windows-nt/Source/XPSP1/NT/enduser/speech/tts/ms_entropic/frontend.cpp

4034 lines
112 KiB
C++
Raw Normal View History

2020-09-26 03:20:57 -05:00
/*******************************************************************************
* Frontend.cpp *
*--------------*
* Description:
* This module is the main implementation file for the CFrontend class.
*-------------------------------------------------------------------------------
* Created By: mc Date: 03/12/99
* Copyright (C) 1999 Microsoft Corporation
* All Rights Reserved
*
*******************************************************************************/
//--- Additional includes
#include "stdafx.h"
#include "ms_entropicengine.h"
#include "Frontend.h"
#include "spdebug.h"
#include "FeedChain.h"
#include "AlloOps.h"
#include "sapi.h"
#include "StdSentEnum.h"
static bool IsVowel ( char* ph );
//-----------------------------
// Data.cpp
//-----------------------------
extern const short g_IPAToAllo[];
extern const float g_RateScale[];
inline short GetPhoneF0( float *pF0Contour, float CurrentTime, float Length )
{
float Total = 0;
int startIndex = (int) ( CurrentTime / PITCH_BUF_RES + 0.5 );
int endIndex = (int) ( ( CurrentTime + Length ) / PITCH_BUF_RES + 0.5 );
for ( int i = startIndex; i < endIndex; i++ )
{
Total += pF0Contour[i];
}
Total /= endIndex - startIndex;
return (short) Total;
}
const char* OldMapPhoneSet (ALLO_CODE code)
{
static struct tagPhoneMap {
const char* name;
ALLO_CODE code;
} phoneMap [] = {
{"iy", _IY_},
{"ih", _IH_},
{"eh", _EH_},
{"ae", _AE_},
{"aa", _AA_},
{"ah", _AH_},
{"ao", _AO_},
{"uh", _UH_},
{"ax", _AX_},
{"axr", _ER_}, // or "er"
{"ey", _EY_},
{"ay", _AY_},
{"oy", _OY_},
{"aw", _AW_},
{"ow", _OW_},
{"uw", _UW_},
{"ix", _IX_},
{"sil", _SIL_},
{"w", _w_},
{"y", _y_},
{"r", _r_},
{"l", _l_},
{"hh", _h_},
{"m", _m_},
{"n", _n_},
{"ng", _NG_},
{"f", _f_},
{"v", _v_},
{"th", _TH_},
{"dh", _DH_},
{"s", _s_},
{"z", _z_},
{"sh", _SH_},
{"zh", _ZH_},
{"p", _p_},
{"b", _b_},
{"t", _t_},
{"d", _d_},
{"k", _k_},
{"g", _g_},
{"ch", _CH_},
{"jh", _JH_},
{"dx", _DX_},
{"", _STRESS1_},
{"", _STRESS2_},
{"", _EMPHSTRESS_},
{"", _SYLLABLE_}
};
static int nPhonesMap = sizeof (phoneMap) / sizeof(phoneMap[0]);
int i;
for ( i = 0; i < nPhonesMap; i++ )
{
if (code == phoneMap[i].code)
{
return phoneMap[i].name;
}
}
return "";
}
const char* NewMapPhoneSet (ALLO_CODE code)
{
static struct tagPhoneMap {
const char* name;
ALLO_CODE code;
} phoneMap [] = {
{"iy", _IY_},
{"ih", _IH_},
{"eh", _EH_},
{"ae", _AE_},
{"aa", _AA_},
{"ah", _AH_},
{"ao", _AO_},
{"uh", _UH_},
{"ax", _AX_},
{"er", _ER_}, // or "er"
{"ey", _EY_},
{"ay", _AY_},
{"oy", _OY_},
{"aw", _AW_},
{"ow", _OW_},
{"uw", _UW_},
{"ix", _IX_},
{"sil", _SIL_},
{"w", _w_},
{"y", _y_},
{"r", _r_},
{"l", _l_},
{"h", _h_},
{"m", _m_},
{"n", _n_},
{"ng", _NG_},
{"f", _f_},
{"v", _v_},
{"th", _TH_},
{"dh", _DH_},
{"s", _s_},
{"z", _z_},
{"sh", _SH_},
{"zh", _ZH_},
{"p", _p_},
{"b", _b_},
{"t", _t_},
{"d", _d_},
{"k", _k_},
{"g", _g_},
{"ch", _CH_},
{"jh", _JH_},
{"dx", _DX_},
{"", _STRESS1_},
{"", _STRESS2_},
{"", _EMPHSTRESS_},
{"", _SYLLABLE_}
};
static int nPhonesMap = sizeof (phoneMap) / sizeof(phoneMap[0]);
int i;
for ( i = 0; i < nPhonesMap; i++ )
{
if (code == phoneMap[i].code)
{
return phoneMap[i].name;
}
}
return "";
}
/*****************************************************************************
* CFrontend::CFrontend *
*----------------------*
* Description:
*
********************************************************************** MC ***/
CFrontend::CFrontend()
{
SPDBG_FUNC( "CFrontend::CFrontend" );
#ifdef USE_VOICEDATAOBJ
m_pUnits = NULL;
#endif
m_unitCount = 0;
m_CurUnitIndex = 0;
m_pAllos = NULL;
m_pSrcObj = NULL;
m_fNewPhoneSet = FALSE;
} /* CFrontend::CFrontend */
/*****************************************************************************
* CFrontend::~CFrontend *
*-----------------------*
* Description:
*
********************************************************************** MC ***/
CFrontend::~CFrontend()
{
SPDBG_FUNC( "CFrontend::~CFrontend" );
#ifdef USE_VOICEDATAOBJ
DisposeUnits();
#endif
if( m_pAllos )
{
delete m_pAllos;
m_pAllos = NULL;
}
DeleteTokenList();
} /* CFrontend::~CFrontend */
/*****************************************************************************
* CFrontend::CntrlToRatio *
*-------------------------*
* Description:
* Return rate ratio from control
*
********************************************************************** MC ***/
float CFrontend::CntrlToRatio( long rateControl )
{
SPDBG_FUNC( "CFrontend::CntrlToRatio" );
float rateRatio;
if( rateControl < 0 )
{
//--------------------------------
// DECREASE the rate
//--------------------------------
if( rateControl < MIN_USER_RATE )
{
rateControl = MIN_USER_RATE; // clip to min
}
rateRatio = 1.0f / ::g_RateScale[0 - rateControl];
}
else
{
//--------------------------------
// INCREASE the rate
//--------------------------------
if( rateControl > MAX_USER_RATE )
{
rateControl = MAX_USER_RATE; // clip to max
}
rateRatio = ::g_RateScale[rateControl];
}
return rateRatio;
} /* CFrontend::CntrlToRatio */
/*****************************************************************************
* CFrontend::Init *
*-----------------*
* Description:
* Init voice dependent variables, call once when object is created+++
*
********************************************************************** MC ***/
#ifdef USE_VOICEDATAOBJ
HRESULT CFontend::Init( CVoiceData* pVoiceDataObj, CFeedChain *pSrcObj, MSVOICEINFO* pVoiceInfo,
EntropicPitchInfo PitchInfo, bool fNewPhoneSet )
#else
HRESULT CFrontend::Init( void* pVoiceDataObj, CFeedChain *pSrcObj, void* pVoiceInfo,
EntropicPitchInfo PitchInfo, bool fNewPhoneSet )
#endif
{
SPDBG_FUNC( "CFrontend::Init" );
HRESULT hr = S_OK;
m_pSrcObj = pSrcObj;
m_BasePitch = PitchInfo.BasePitch;
#ifdef USE_VOICEDATAOBJ
m_pVoiceDataObj = pVoiceDataObj;
m_ProsodyGain = ((float)pVoiceInfo->ProsodyGain) / 100.0f;
m_SampleRate = (float)pVoiceInfo->SampleRate;
#endif
// NOTE: move these to voice data?
// m_VoiceWPM = pVoiceInfo->Rate;
// m_PitchRange = pVoiceInfo->PitchRange;
m_VoiceWPM = 180;
m_PitchRange = PitchInfo.Range;
m_RateRatio_API = m_RateRatio_PROSODY = 1.0f;
m_fNewPhoneSet = fNewPhoneSet;
return hr;
} /* CFrontend::Init */
static ULONG IPA_to_Allo( WCHAR* pSrc, ALLO_CODE* pDest )
{
ULONG iIpa, iAllo, i;
ULONG gotMatch; // for debugging
iIpa = iAllo = 0;
while( pSrc[iIpa] > 0 )
{
gotMatch = false;
//-----------------------------------------
// ...then search for single word IPA's
//-----------------------------------------
for( i = 0; i < NUMBER_OF_ALLO; i++ )
{
if( pSrc[iIpa] == g_IPAToAllo[i] )
{
pDest[iAllo] = (ALLO_CODE)i;
gotMatch = true;
break;
}
}
if( gotMatch )
{
iAllo++;
}
/*else
{
// Should NEVER get here. Unsupported IPA unicode!
// Ignore it and go on.
}*/
//----------------------------------
// Clip at max length
//----------------------------------
if( iAllo >= (SP_MAX_PRON_LENGTH-1) )
{
iAllo = SP_MAX_PRON_LENGTH-1;
break;
}
iIpa++;
}
return iAllo;
}
/*****************************************************************************
* CFrontend::AlloToUnit *
*-----------------------*
* Description:
* Transform ALLO stream into backend UNIT stream+++
*
********************************************************************** MC ***/
#ifdef USE_VOICEDATAOBJ
HRESULT CFrontend::AlloToUnit( CAlloList *pAllos, UNITINFO *pu )
{
SPDBG_FUNC( "CFrontend::AlloToUnit" );
bool bFirstPass;
long msPhon, attr;
ULONG numOfCells;
CAlloCell *pCurCell, *pNextCell;
HRESULT hr = S_OK;
bFirstPass = true;
numOfCells = pAllos->GetCount();
pCurCell = pAllos->GetHeadCell();
pNextCell = pAllos->GetNextCell();
while( pCurCell )
{
//--------------------------------------
// Get next allo ID
//--------------------------------------
if( pNextCell )
{
pu->NextAlloID = (USHORT)pNextCell->m_allo;
}
else
{
pu->NextAlloID = _SIL_;
}
//--------------------------------------
// Convert to Whistler phon code
//--------------------------------------
attr = 0;
if( pCurCell->m_ctrlFlags & PRIMARY_STRESS )
{
attr |= ALLO_IS_STRESSED;
}
hr = m_pVoiceDataObj->AlloToUnit( (short)pCurCell->m_allo, attr, &msPhon );
if( FAILED(hr) )
{
//------------------------
// allo ID is invalid
//------------------------
break;
}
else
{
pu->PhonID = msPhon;
pu->AlloID = (USHORT)pCurCell->m_allo;
pu->flags = 0;
pu->AlloFeatures = 0;
pu->ctrlFlags = pCurCell->m_ctrlFlags;
//--------------------------------------
// Flag WORD boundary
//--------------------------------------
if( pCurCell->m_ctrlFlags & WORD_START )
{
pu->flags |= WORD_START_FLAG;
//----------------------------------------------
// Remember source word position and length
//----------------------------------------------
pu->srcPosition = pCurCell->m_SrcPosition;
pu->srcLen = pCurCell->m_SrcLen;
}
//----------------------------------------------------
// Flag SENTENCE boundary on 1st displayable word
//----------------------------------------------------
if( bFirstPass && (pCurCell->m_SentenceLen > 0) )
{
bFirstPass = false;
pu->flags |= SENT_START_FLAG;
//----------------------------------------------
// Remember source word position and length
//----------------------------------------------
pu->sentencePosition = pCurCell->m_SentencePosition;
pu->sentenceLen = pCurCell->m_SentenceLen;
}
pu->nKnots = KNOTS_PER_PHON;
/*for( k = 0; k < pu->nKnots; k++ )
{
pu->pTime[k] = pCurCell->m_ftTime[k] * m_SampleRate;
pu->pF0[k] = pCurCell->m_ftPitch[k];
pu->pAmp[k] = pu->ampRatio;
}*/
//----------------------------
// Controls and events
//----------------------------
pu->user_Volume = pCurCell->m_user_Volume;
pu->pBMObj = (void*)pCurCell->m_pBMObj;
pCurCell->m_pBMObj = NULL;
//----------------------------------------
// Pass features for viseme event
//----------------------------------------
if( pCurCell->m_ctrlFlags & PRIMARY_STRESS )
{
pu->AlloFeatures |= SPVFEATURE_STRESSED;
}
if( pCurCell->m_ctrlFlags & EMPHATIC_STRESS )
{
pu->AlloFeatures |= SPVFEATURE_EMPHASIS;
}
pu->duration = PITCH_BUF_RES;
pu->silenceSource = pCurCell->m_SilenceSource;
pu++;
}
pCurCell = pNextCell;
pNextCell = pAllos->GetNextCell();
}
return hr;
} /* CFrontend::AlloToUnit */
#endif
/*****************************************************************************
* CFrontend::PrepareSpeech *
*--------------------------*
* Description:
* Prepare frontend for new speech
*
********************************************************************** MC ***/
void CFrontend::PrepareSpeech( IEnumSpSentence* pEnumSent, ISpTTSEngineSite *pOutputSite )
{
SPDBG_FUNC( "CFrontend::PrepareSpeech" );
m_pEnumSent = pEnumSent;
m_SpeechState = SPEECH_CONTINUE;
m_CurUnitIndex = m_unitCount = 0;
m_HasSpeech = false;
m_pOutputSite = pOutputSite;
m_fInQuoteProsody = m_fInParenProsody = false;
m_CurPitchOffs = 0;
m_CurPitchRange = 1.0;
} /* CFrontend::PrepareSpeech */
/*****************************************************************************
* IsTokenPunct *
*--------------*
* Description:
* Return TRUE if char is , . ! or ?
*
********************************************************************** MC ***/
bool fIsPunctuation( TTSSentItem Item )
{
SPDBG_FUNC( "IsTokenPunct" );
return ( Item.pItemInfo->Type == eCOMMA ||
Item.pItemInfo->Type == eSEMICOLON ||
Item.pItemInfo->Type == eCOLON ||
Item.pItemInfo->Type == ePERIOD ||
Item.pItemInfo->Type == eQUESTION ||
Item.pItemInfo->Type == eEXCLAMATION ||
Item.pItemInfo->Type == eHYPHEN );
} /* fIsPunctuation */
/*****************************************************************************
* CFrontend::ToBISymbols *
*------------------------*
* Description:
* Label each word with ToBI prosody notation+++
*
********************************************************************** MC ***/
HRESULT CFrontend::ToBISymbols()
{
SPDBG_FUNC( "CFrontend::ToBISymbols" );
TOBI_PHRASE *pTPhrase;
long i, cPhrases;
PROSODY_POS prevPOS, curPOS;
bool possible_YNQ = false;
long cTok;
CFEToken *pTok, *pPrevTok, *pAuxTok;
bool hasEmph = false;
SPLISTPOS listPos;
//----------------------------------
// Get memory for phrase array
//----------------------------------
pAuxTok = NULL; // To quiet the compiler
cTok = m_TokList.GetCount();
if( cTok )
{
pTPhrase = new TOBI_PHRASE[cTok]; // worse case: each token is a phrase
if( pTPhrase )
{
//---------------------------------------------
// Find sub-phrases from POS
// For now, detect function/content boundaries
//---------------------------------------------
hasEmph = false;
cPhrases = 0;
i = 0;
listPos = m_TokList.GetHeadPosition();
pTok = m_TokList.GetNext( listPos );
prevPOS = pTok->m_posClass;
while( pTok->phon_Str[0] == _SIL_ )
{
if( i >= (cTok-1) )
{
break;
}
i++;
if( listPos != NULL )
{
pTok = m_TokList.GetNext( listPos );
}
}
if( pTok->m_posClass == POS_AUX )
{
//---------------------------------
// Could be a yes/no question
//---------------------------------
possible_YNQ = true;
pAuxTok = pTok;
}
pTPhrase[cPhrases].start = i;
for( ; i < cTok; i++ )
{
curPOS = pTok->m_posClass;
if( (curPOS != prevPOS) && (pTok->phon_Str[0] != _SIL_) )
{
pTPhrase[cPhrases].posClass = prevPOS;
pTPhrase[cPhrases].end = i-1;
cPhrases++;
pTPhrase[cPhrases].start = i;
prevPOS = curPOS;
}
if( pTok->user_Emph > 0 )
{
hasEmph = true;
}
if( listPos != NULL )
{
pTok = m_TokList.GetNext( listPos );
}
}
//-------------------------------
// Complete last phrase
//-------------------------------
pTPhrase[cPhrases].posClass = prevPOS;
pTPhrase[cPhrases].end = i-1;
cPhrases++;
for( i = 0; i < cPhrases; i++ )
{
//-------------------------------------------------------
// Sequence of function words, place a low tone
// on the LAST word in a func sequence,
// if there are more than 1 words in the sequence.
//-------------------------------------------------------
if( ((pTPhrase[i].posClass == POS_FUNC) || (pTPhrase[i].posClass == POS_AUX)) &&
(pTPhrase[i].end - pTPhrase[i].start) )
{
pTok = (CFEToken*)m_TokList.GetAt( m_TokList.FindIndex( pTPhrase[i].end ));
if( pTok->m_Accent == K_NOACC )
{
pTok->m_Accent = K_LSTAR;
pTok->m_Accent_Prom = 2;
pTok->m_AccentSource = ACC_FunctionSeq;
}
}
//-------------------------------------------------------
// Sequence of content words, place a high or
// rising tone, of random prominence,
// on the FIRST word in the content sequence
//-------------------------------------------------------
else if ( ((pTPhrase[i].posClass == POS_CONTENT) || (pTPhrase[i].posClass == POS_UNK)) )
{
pTok = (CFEToken*)m_TokList.GetAt( m_TokList.FindIndex( pTPhrase[i].start ));
if( pTok->m_Accent == K_NOACC )
{
pTok->m_Accent = K_HSTAR;
pTok->m_Accent_Prom = rand() % 5;
pTok->m_AccentSource = ACC_ContentSeq;
}
}
}
delete pTPhrase;
//-----------------------------------------
// Now, insert the BOUNDARY tags
//-----------------------------------------
listPos = m_TokList.GetHeadPosition();
pPrevTok = m_TokList.GetNext( listPos );
for( i = 1; i < cTok; i++ )
{
pTok = m_TokList.GetNext( listPos );
//--------------------------------
// Place a terminal boundary
//--------------------------------
if( pTok->m_TuneBoundaryType != NULL_BOUNDARY )
{
switch( pTok->m_TuneBoundaryType )
{
case YN_QUEST_BOUNDARY:
{
pPrevTok->m_Accent = K_LSTAR;
pPrevTok->m_Accent_Prom = 10;
pPrevTok->m_Boundary = K_HMINUSHPERC;
pPrevTok->m_Boundary_Prom = 10;
//-- Diagnostic
if( pPrevTok->m_AccentSource == ACC_NoSource )
{
pPrevTok->m_AccentSource = ACC_YNQuest;
}
//-- Diagnostic
if( pPrevTok->m_BoundarySource == BND_NoSource )
{
pPrevTok->m_BoundarySource = BND_YNQuest;
}
//-------------------------------------------------------
// Accent an aux verb in initial position (possible ynq)
//-------------------------------------------------------
if( possible_YNQ )
{
pAuxTok->m_Accent = K_HSTAR;
pAuxTok->m_Accent_Prom = 5;
pAuxTok->m_AccentSource = ACC_InitialVAux;
}
}
break;
case WH_QUEST_BOUNDARY:
case DECLAR_BOUNDARY:
case EXCLAM_BOUNDARY:
{
if (pPrevTok->m_posClass == POS_CONTENT)
{
pPrevTok->m_Accent = K_HSTAR;
pPrevTok->m_Accent_Prom = 4;
//-- Diagnostic
if( pPrevTok->m_AccentSource == ACC_NoSource )
{
pPrevTok->m_AccentSource = ACC_Period;
}
}
pPrevTok->m_Boundary = K_LMINUSLPERC;
pPrevTok->m_Boundary_Prom = 10;
//--- Diagnostic
if( pPrevTok->m_BoundarySource == BND_NoSource )
{
pPrevTok->m_BoundarySource = BND_Period;
}
}
break;
case PHRASE_BOUNDARY:
{
if (pPrevTok->m_posClass == POS_CONTENT)
{
pPrevTok->m_Accent = K_LHSTAR;
pPrevTok->m_Accent_Prom = 10;
//-- Diagnostic
if( pPrevTok->m_AccentSource == ACC_NoSource )
{
pPrevTok->m_AccentSource = ACC_Comma;
}
}
pPrevTok->m_Boundary = K_LMINUSHPERC;
pPrevTok->m_Boundary_Prom = 5;
//-- Diagnostic
if( pPrevTok->m_BoundarySource == BND_NoSource )
{
pPrevTok->m_BoundarySource = BND_Comma;
}
}
break;
case NUMBER_BOUNDARY:
{
pPrevTok->m_Boundary = K_LMINUSHPERC;
pPrevTok->m_Boundary_Prom = 5;
//-- Diagnostic
if( pPrevTok->m_BoundarySource == BND_NoSource )
{
pPrevTok->m_BoundarySource = BND_NumberTemplate;
}
}
break;
default:
{
// Use comma for all other boundaries
if (pPrevTok->m_posClass == POS_CONTENT)
{
pPrevTok->m_Accent = K_LHSTAR;
pPrevTok->m_Accent_Prom = 10;
//-- Diagnostic
if( pPrevTok->m_AccentSource == ACC_NoSource )
{
pPrevTok->m_AccentSource = pTok->m_AccentSource;
}
}
pPrevTok->m_Boundary = K_LMINUSHPERC;
pPrevTok->m_Boundary_Prom = 5;
//-- Diagnostic
if( pPrevTok->m_BoundarySource == BND_NoSource )
{
pPrevTok->m_BoundarySource = pTok->m_BoundarySource;
}
}
break;
}
}
pPrevTok = pTok;
}
//--------------------------------------------
// Loop through each word and increase
// pitch prominence if EMPHASIZED and
// decrease prominence for all others
//--------------------------------------------
if( hasEmph )
{
SPLISTPOS listPos;
pPrevTok = NULL;
listPos = m_TokList.GetHeadPosition();
while( listPos )
{
pTok = m_TokList.GetNext( listPos );
//------------------------------
// Is this word emphasized?
//------------------------------
if( pTok->user_Emph > 0 )
{
//------------------------------
// Add my clever H*+L*<2A> tag
//------------------------------
pTok->m_Accent = K_HSTARLSTAR;
pTok->m_Accent_Prom = 10;
pTok->m_Boundary = K_NOBND; // Delete any boundary tag here...
if( pPrevTok )
{
pPrevTok->m_Boundary = K_NOBND; // ...or before
}
}
else
{
//-----------------------------------
// Is non-emphasized word accented?
//-----------------------------------
if( (pTok->m_Accent != K_NOACC) && (pTok->m_Accent_Prom > 5) )
{
//------------------------------
// Then clip its prominence at 5
//------------------------------
pTok->m_Accent_Prom = 5;
}
//------------------------------
// Is it a boundary?
//------------------------------
/*if( (pTok->m_Boundary != K_NOBND) && (pTok->m_Boundary_Prom > 5) )
{
//------------------------------
// Then clip its prominence at 5
//------------------------------
pTok->m_Boundary_Prom = 5;
}*/
}
pPrevTok = pTok;
}
}
}
}
return S_OK;
} /* ToBISymbols */
/*****************************************************************************
* CFrontend::TokensToAllo *
*------------------------*
* Description:
* Transform TOKENS into ALLOS
*
********************************************************************** MC ***/
HRESULT CFrontend::TokensToAllo( CFETokenList *pTokList, CAlloList *pAllo )
{
SPDBG_FUNC( "CFrontend::TokToAllo" );
CAlloCell *pLastCell;
long i;
long cTok;
CFEToken *pCurToken, *pNextToken, *pPrevTok;
SPLISTPOS listPos;
pLastCell = pAllo->GetTailCell(); // Get end (silence)
if( pLastCell )
{
pPrevTok = NULL;
listPos = pTokList->GetHeadPosition();
pCurToken = pTokList->GetNext( listPos );
cTok = pTokList->GetCount();
for( i = 0; i < cTok; i++ )
{
//----------------------------
// Get NEXT word
//----------------------------
if( i < (cTok -1) )
{
pNextToken = pTokList->GetNext( listPos );
}
else
{
pNextToken = NULL;
}
if( pAllo->WordToAllo( pPrevTok, pCurToken, pNextToken, pLastCell ) )
{
m_HasSpeech = true;
}
//----------------------------
// Bump the pipeline
//----------------------------
pPrevTok = pCurToken;
pCurToken = pNextToken;
}
}
return S_OK;
} /* CFrontend::TokensToAllo */
/*****************************************************************************
* CFrontend::GetItemControls *
*----------------------------*
* Description:
* Set user control values from Sent Enum item.
********************************************************************** MC ***/
void CFrontend::GetItemControls( const SPVSTATE* pXmlState, CFEToken* pToken )
{
SPDBG_FUNC( "CFrontend::GetItemControls" );
pToken->user_Volume = pXmlState->Volume;
pToken->user_Rate = pXmlState->RateAdj;
pToken->user_Pitch = pXmlState->PitchAdj.MiddleAdj;
pToken->user_Emph = pXmlState->EmphAdj;
pToken->m_DurScale = CntrlToRatio( pToken->user_Rate );
if( (pToken->m_DurScale * m_RateRatio_API * m_RateRatio_PROSODY)
< DISCRETE_BKPT )
{
//-- If the total rate is low enough, insert breaks between words
pToken->m_TermSil = 0.050f /
(pToken->m_DurScale * m_RateRatio_API * m_RateRatio_PROSODY);
pToken->m_DurScale = DISCRETE_BKPT;
}
else
{
pToken->m_TermSil = 0;
}
} /* CFrontend::GetItemControls */
/*****************************************************************************
* CFrontend::GetPOSClass *
*------------------------*
* Description:
* Transform SAPI POS code to func/content/aux class.
********************************************************************** MC ***/
PROSODY_POS CFrontend::GetPOSClass( ENGPARTOFSPEECH sapiPOS )
{
SPDBG_FUNC( "CFrontend::GetPOSClass" );
PROSODY_POS posClass;
posClass = POS_UNK;
switch( sapiPOS )
{
case MS_Noun:
case MS_Verb:
case MS_Adj:
case MS_Adv:
case MS_Interjection:
{
posClass = POS_CONTENT;
break;
}
case MS_VAux:
{
posClass = POS_AUX;
break;
}
case MS_Modifier:
case MS_Function:
case MS_Interr:
case MS_Pron:
case MS_ObjPron:
case MS_SubjPron:
case MS_RelPron:
case MS_Conj:
case MS_CConj:
case MS_Det:
case MS_Contr:
case MS_Prep:
{
posClass = POS_FUNC;
break;
}
}
return posClass;
} /* CFrontend::GetPOSClass */
#define QUOTE_HESITATION 100 // Number of msec
#define PAREN_HESITATION 100 // Number of msec
#define PAREN_HESITATION_TAIL 100 // Number of msec
#define EMPH_HESITATION 1 // Number of msec
/*****************************************************************************
* CFrontend::StateQuoteProsody *
*------------------------------*
* Description:
*
********************************************************************** MC ***/
bool CFrontend::StateQuoteProsody( CFEToken *pWordTok, TTSSentItem *pSentItem, bool fInsertSil )
{
SPDBG_FUNC( "CFrontend::StateQuoteProsody" );
bool result = false;
if( !m_fInParenProsody )
{
if( m_fInQuoteProsody )
{
//------------------------------
// Stop quote prosody
//------------------------------
m_fInQuoteProsody = false;
m_CurPitchOffs = 0.0f;
m_CurPitchRange = 1.0f;
if( fInsertSil )
{
(void)InsertSilenceAtTail( pWordTok, pSentItem, QUOTE_HESITATION );
pWordTok->m_SilenceSource = SIL_QuoteEnd;
}
}
else
{
//------------------------------
// Begin quote prosody
//------------------------------
m_fInQuoteProsody = true;
m_CurPitchOffs = 0.1f;
m_CurPitchRange = 1.25f;
if( fInsertSil )
{
(void)InsertSilenceAtTail( pWordTok, pSentItem, QUOTE_HESITATION );
pWordTok->m_SilenceSource = SIL_QuoteStart;
}
}
result = true;
}
return result;
} /* CFrontend::StateQuoteProsody */
/*****************************************************************************
* CFrontend::StartParenProsody *
*------------------------------*
* Description:
*
********************************************************************** MC ***/
bool CFrontend::StartParenProsody( CFEToken *pWordTok, TTSSentItem *pSentItem, bool fInsertSil )
{
SPDBG_FUNC( "CFrontend::StartParenProsody" );
bool result = false;
if( (!m_fInParenProsody) && (!m_fInQuoteProsody) )
{
m_CurPitchOffs = -0.2f;
m_CurPitchRange = 0.75f;
m_fInParenProsody = true;
m_RateRatio_PROSODY = 1.25f;
if( fInsertSil )
{
(void)InsertSilenceAtTail( pWordTok, pSentItem, PAREN_HESITATION );
pWordTok->m_SilenceSource = SIL_ParenStart;
}
result = true;
}
return result;
} /* CFrontend::StartParenProsody */
/*****************************************************************************
* CFrontend::EndParenProsody *
*----------------------------*
* Description:
*
********************************************************************** MC ***/
bool CFrontend::EndParenProsody( CFEToken *pWordTok, TTSSentItem *pSentItem, bool fInsertSil )
{
SPDBG_FUNC( "CFrontend::EndParenProsody" );
bool result = false;
if( m_fInParenProsody )
{
m_fInParenProsody = false;
m_CurPitchOffs = 0.0f;
m_CurPitchRange = 1.0f;
m_RateRatio_PROSODY = 1.0f;
if( fInsertSil )
{
(void)InsertSilenceAtTail( pWordTok, pSentItem, PAREN_HESITATION_TAIL );
pWordTok->m_SilenceSource = SIL_ParenStart;
}
result = true;
}
return result;
} /* CFrontend::EndParenProsody */
/*****************************************************************************
* CFrontend::InsertSilenceAtTail *
*--------------------------------*
* Description:
*
********************************************************************** MC ***/
SPLISTPOS CFrontend::InsertSilenceAtTail( CFEToken *pWordTok, TTSSentItem *pSentItem, long msec )
{
SPDBG_FUNC( "CFrontend::InsertSilenceAtTail" );
if( msec > 0 )
{
pWordTok->user_Break = msec;
}
pWordTok->phon_Len = 1;
pWordTok->phon_Str[0] = _SIL_;
pWordTok->srcPosition = pSentItem->ulItemSrcOffset;
pWordTok->srcLen = pSentItem->ulItemSrcLen;
pWordTok->tokStr[0] = 0; // There's no orth for Break
pWordTok->tokLen = 0;
pWordTok->m_PitchBaseOffs = m_CurPitchOffs;
pWordTok->m_PitchRangeScale = m_CurPitchRange;
pWordTok->m_ProsodyDurScale = m_RateRatio_PROSODY;
//----------------------------------
// Advance to next token
//----------------------------------
return m_TokList.AddTail( pWordTok );
} /* CFrontend::InsertSilenceAtTail */
/*****************************************************************************
* CFrontend::InsertSilenceAfterPos *
*-----------------------------------*
* Description:
* Insert silence token AFTER 'position'
*
********************************************************************** MC ***/
SPLISTPOS CFrontend::InsertSilenceAfterPos( CFEToken *pWordTok, SPLISTPOS position )
{
SPDBG_FUNC( "CFrontend::InsertSilenceAfterPos" );
pWordTok->phon_Len = 1;
pWordTok->phon_Str[0] = _SIL_;
pWordTok->srcPosition = 0;
pWordTok->srcLen = 0;
pWordTok->tokStr[0] = '+'; // punctuation
pWordTok->tokStr[1] = 0; // delimiter
pWordTok->tokLen = 1;
pWordTok->m_PitchBaseOffs = m_CurPitchOffs;
pWordTok->m_PitchRangeScale = m_CurPitchRange;
pWordTok->m_ProsodyDurScale = m_RateRatio_PROSODY;
pWordTok->m_DurScale = 0;
//----------------------------------
// Advance to next token
//----------------------------------
return m_TokList.InsertAfter( position, pWordTok );
} /* CFrontend::InsertSilenceAfterPos */
/*****************************************************************************
* CFrontend::InsertSilenceBeforePos *
*------------------------------------*
* Description:
* Insert silence token BEFORE 'position'
*
********************************************************************** MC ***/
SPLISTPOS CFrontend::InsertSilenceBeforePos( CFEToken *pWordTok, SPLISTPOS position )
{
SPDBG_FUNC( "CFrontend::InsertSilenceBeforePos" );
pWordTok->phon_Len = 1;
pWordTok->phon_Str[0] = _SIL_;
pWordTok->srcPosition = 0;
pWordTok->srcLen = 0;
pWordTok->tokStr[0] = '+'; // punctuation
pWordTok->tokStr[1] = 0; // delimiter
pWordTok->tokLen = 1;
pWordTok->m_PitchBaseOffs = m_CurPitchOffs;
pWordTok->m_PitchRangeScale = m_CurPitchRange;
pWordTok->m_ProsodyDurScale = m_RateRatio_PROSODY;
pWordTok->m_DurScale = 0;
//----------------------------------
// Advance to next token
//----------------------------------
return m_TokList.InsertBefore( position, pWordTok );
} /* CFrontend::InsertSilenceBeforePos */
#define K_ACCENT_PROM ((rand() % 4) + 4)
#define K_DEACCENT_PROM 5
#define K_ACCENT K_HSTAR
#define K_DEACCENT K_NOACC
/*****************************************************************************
* CFrontend::ProsodyTemplates *
*-----------------------------*
* Description:
* Call prosody template function for supported item types.
*
********************************************************************** MC ***/
void CFrontend::ProsodyTemplates( SPLISTPOS clusterPos, TTSSentItem *pSentItem )
{
SPDBG_FUNC( "CFrontend::ProsodyTemplates" );
long cWordCount;
CFEToken *pClusterTok;
switch( pSentItem->pItemInfo->Type )
{
//---------------------------------------
// Numbers
//---------------------------------------
case eNUM_ROMAN_NUMERAL:
case eNUM_ROMAN_NUMERAL_ORDINAL:
{
if ( ( (TTSRomanNumeralItemInfo*) pSentItem->pItemInfo )->pNumberInfo->Type != eDATE_YEAR )
{
if ( ((TTSNumberItemInfo*)((TTSRomanNumeralItemInfo*)pSentItem->pItemInfo)->pNumberInfo)->pIntegerPart )
{
DoIntegerTemplate( &clusterPos,
(TTSNumberItemInfo*)((TTSRomanNumeralItemInfo*)pSentItem->pItemInfo)->pNumberInfo,
pSentItem->ulNumWords );
}
if ( ((TTSNumberItemInfo*)((TTSRomanNumeralItemInfo*)pSentItem->pItemInfo)->pNumberInfo)->pDecimalPart )
{
DoNumByNumTemplate( &clusterPos,
((TTSNumberItemInfo*)((TTSRomanNumeralItemInfo*)pSentItem->pItemInfo)->pNumberInfo)->pDecimalPart->ulNumDigits );
}
}
}
break;
case eNUM_CARDINAL:
case eNUM_DECIMAL:
case eNUM_ORDINAL:
case eNUM_MIXEDFRACTION:
{
if ( ( (TTSNumberItemInfo*) pSentItem->pItemInfo )->pIntegerPart )
{
cWordCount = DoIntegerTemplate( &clusterPos,
(TTSNumberItemInfo*) pSentItem->pItemInfo,
pSentItem->ulNumWords );
}
if( ( (TTSNumberItemInfo*) pSentItem->pItemInfo )->pDecimalPart )
{
//-----------------------------------------
// Skip "point" string...
//-----------------------------------------
(void) m_TokList.GetNext( clusterPos );
//-----------------------------------------
// ...and do single digit prosody
//-----------------------------------------
DoNumByNumTemplate( &clusterPos,
( (TTSNumberItemInfo*) pSentItem->pItemInfo )->pDecimalPart->ulNumDigits );
}
if ( ( (TTSNumberItemInfo*) pSentItem->pItemInfo )->pFractionalPart )
{
//-----------------------------------------
// Skip "and" string...
//-----------------------------------------
pClusterTok = m_TokList.GetNext( clusterPos );
if( pClusterTok->m_Accent == K_NOACC )
{
//--------------------------------------
// Force POS for "and" to noun
// so phrasing rules don't kick in!
//--------------------------------------
pClusterTok->m_Accent = K_DEACCENT;
pClusterTok->m_Accent_Prom = K_DEACCENT_PROM;
pClusterTok->POScode = MS_Noun;
pClusterTok->m_posClass = POS_CONTENT;
}
//-----------------------------------------
// ...and do fraction prosody
//-----------------------------------------
cWordCount = DoFractionTemplate( &clusterPos,
(TTSNumberItemInfo*) pSentItem->pItemInfo,
pSentItem->ulNumWords );
}
}
break;
//---------------------------------------
// Fraction
//---------------------------------------
case eNUM_FRACTION:
{
cWordCount = DoFractionTemplate( &clusterPos,
(TTSNumberItemInfo*) pSentItem->pItemInfo,
pSentItem->ulNumWords );
}
break;
//---------------------------------------
// Money
//---------------------------------------
case eNUM_CURRENCY:
{
DoCurrencyTemplate( clusterPos, pSentItem );
}
break;
//---------------------------------------
// Phone Numbers
//---------------------------------------
case eNUM_PHONENUMBER:
case eNEWNUM_PHONENUMBER:
{
DoPhoneNumberTemplate( clusterPos, pSentItem );
}
break;
//---------------------------------------
// Time-of-Day
//---------------------------------------
case eTIMEOFDAY:
{
DoTODTemplate( clusterPos, pSentItem );
}
break;
case eELLIPSIS:
{
CFEToken *pWordTok;
pWordTok = new CFEToken;
if( pWordTok )
{
clusterPos = InsertSilenceAtTail( pWordTok, pSentItem, 0 );
//clusterPos = m_TokList.GetTailPosition( );
//clusterPos = InsertSilenceAfterPos( pWordTok, clusterPos );
pWordTok->m_SilenceSource = SIL_Ellipsis;
pWordTok->m_TuneBoundaryType = ELLIPSIS_BOUNDARY;
pWordTok->m_BoundarySource = BND_Ellipsis;
}
}
break;
}
} /* CFrontend::ProsodyTemplates */
/*****************************************************************************
* CFrontend::DoTODTemplate *
*--------------------------*
* Description:
* Prosody template for time-of-day.
*
* TODO: Temp kludge - needs more info in TTSTimeOfDayItemInfo
********************************************************************** MC ***/
void CFrontend::DoTODTemplate( SPLISTPOS clusterPos, TTSSentItem *pSentItem )
{
SPDBG_FUNC( "CFrontend::DoTODTemplate" );
TTSTimeOfDayItemInfo *pTOD;
CFEToken *pWordTok;
CFEToken *pClusterTok;
SPLISTPOS curPos, nextPos, prevPos;
curPos = nextPos = clusterPos;
pTOD = (TTSTimeOfDayItemInfo*)&pSentItem->pItemInfo->Type;
// Can't do 24 hr because there's no way to tell
// if it's 1 or 2 digits (18: vs 23:)
if( !pTOD->fTwentyFourHour )
{
//-------------------------------------
// Get HOUR token
//-------------------------------------
pClusterTok = m_TokList.GetNext( nextPos );
//-------------------------------------
// Accent hour
//-------------------------------------
pClusterTok->m_Accent = K_ACCENT;
pClusterTok->m_Accent_Prom = K_ACCENT_PROM;
pClusterTok->m_AccentSource = ACC_TimeOFDay_HR;
//---------------------------------
// Insert SILENCE after hour
//---------------------------------
pWordTok = new CFEToken;
if( pWordTok )
{
nextPos = InsertSilenceAfterPos( pWordTok, clusterPos );
pWordTok->m_SilenceSource = SIL_TimeOfDay_HR;
pWordTok->m_TuneBoundaryType = NUMBER_BOUNDARY;
pWordTok->m_BoundarySource = BND_TimeOFDay_HR;
pWordTok = NULL;
//----------------------------
// Skip last digit
//----------------------------
if( clusterPos != NULL )
{
curPos = nextPos;
pClusterTok = m_TokList.GetNext( nextPos );
}
}
if( pTOD->fMinutes )
{
curPos = nextPos;
pClusterTok = m_TokList.GetNext( nextPos );
//-------------------------------------
// Accent 1st digit for minutes
//-------------------------------------
pClusterTok->m_Accent = K_ACCENT;
pClusterTok->m_Accent_Prom = K_ACCENT_PROM;
pClusterTok->m_AccentSource = ACC_TimeOFDay_1stMin;
}
if( pTOD->fTimeAbbreviation )
{
curPos = prevPos = m_TokList.GetTailPosition( );
pClusterTok = m_TokList.GetPrev( prevPos );
pWordTok = new CFEToken;
if( pWordTok )
{
prevPos = InsertSilenceBeforePos( pWordTok, prevPos );
pWordTok->m_SilenceSource = SIL_TimeOfDay_AB;
pWordTok->m_TuneBoundaryType = TOD_BOUNDARY;
pWordTok->m_BoundarySource = BND_TimeOFDay_AB;
pWordTok = NULL;
//pClusterTok = m_TokList.GetNext( clusterPos );
//pClusterTok = m_TokList.GetNext( clusterPos );
}
//-------------------------------------
// Accent "M"
//-------------------------------------
pClusterTok = m_TokList.GetNext( curPos );
pClusterTok->m_Accent = K_ACCENT;
pClusterTok->m_Accent_Prom = K_ACCENT_PROM;
pClusterTok->m_AccentSource = ACC_TimeOFDay_M;
}
}
} /* CFrontend::DoTODTemplate */
CFEToken *CFrontend::InsertPhoneSilenceAtSpace( SPLISTPOS *pClusterPos,
BOUNDARY_SOURCE bndSrc,
SILENCE_SOURCE silSrc )
{
CFEToken *pWordTok;
SPLISTPOS curPos, nextPos;
curPos = nextPos = *pClusterPos;
//---------------------------------
// Insert SILENCE after area code
//---------------------------------
pWordTok = new CFEToken;
if( pWordTok )
{
nextPos = InsertSilenceBeforePos( pWordTok, curPos );
pWordTok->m_SilenceSource = silSrc;
pWordTok->m_TuneBoundaryType = PHONE_BOUNDARY;
pWordTok->m_BoundarySource = bndSrc;
pWordTok->m_AccentSource = ACC_PhoneBnd_AREA; // @@@@ ???
pWordTok = NULL;
//----------------------------
// Skip last digit
//----------------------------
if( nextPos != NULL )
{
curPos = nextPos;
pWordTok = m_TokList.GetNext( nextPos );
}
}
//pWordTok = m_TokList.GetNext( clusterPos );
//-----------------------------------------
// Filter and embedded silences
//-----------------------------------------
while( (pWordTok->phon_Str[0] == _SIL_) && (nextPos != NULL) )
{
curPos = nextPos;
pWordTok = m_TokList.GetNext( nextPos );
}
*pClusterPos = curPos;
return pWordTok;
}
void CFrontend::InsertPhoneSilenceAtEnd( BOUNDARY_SOURCE bndSrc,
SILENCE_SOURCE silSrc )
{
CFEToken *pWordTok;
SPLISTPOS curPos, nextPos;
curPos = m_TokList.GetTailPosition( );
//---------------------------------
// Insert SILENCE after area code
//---------------------------------
pWordTok = new CFEToken;
if( pWordTok )
{
nextPos = InsertSilenceAfterPos( pWordTok, curPos );
pWordTok->m_SilenceSource = silSrc;
pWordTok->m_TuneBoundaryType = PHONE_BOUNDARY;
pWordTok->m_BoundarySource = bndSrc;
pWordTok->m_AccentSource = ACC_PhoneBnd_AREA; // @@@@ ???
}
}
/*****************************************************************************
* CFrontend::DoPhoneNumberTemplate *
*----------------------------------*
* Description:
* Prosody template for phone numbers.
*
********************************************************************** MC ***/
void CFrontend::DoPhoneNumberTemplate( SPLISTPOS clusterPos, TTSSentItem *pSentItem )
{
SPDBG_FUNC( "CFrontend::DoPhoneNumberTemplate" );
TTSPhoneNumberItemInfo *pFone;
CFEToken *pClusterTok;
long cWordCount;
SPLISTPOS curPos, nextPos;
curPos = nextPos = clusterPos;
pFone = (TTSPhoneNumberItemInfo*)&pSentItem->pItemInfo->Type;
//+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
//
// COUNTRY CODE
//
//+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
if( pFone->pCountryCode )
{
//-------------------------------------
// Skip "country" and...
//-------------------------------------
curPos = nextPos;
pClusterTok = m_TokList.GetNext( nextPos );
//-------------------------------------
// ...skip "code"
//-------------------------------------
curPos = nextPos;
pClusterTok = m_TokList.GetNext( nextPos );
cWordCount = DoIntegerTemplate( &nextPos,
pFone->pCountryCode,
pSentItem->ulNumWords );
pClusterTok = InsertPhoneSilenceAtSpace( &nextPos, BND_Phone_COUNTRY, SIL_Phone_COUNTRY );
}
//+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
//
// "One"
//
//+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
if( pFone->fOne )
{
//-------------------------------------
// Skip "One"
//-------------------------------------
curPos = nextPos;
pClusterTok = m_TokList.GetNext( nextPos );
//-------------------------------------
// and add silence
//-------------------------------------
pClusterTok = InsertPhoneSilenceAtSpace( &nextPos, BND_Phone_ONE, SIL_Phone_ONE );
}
//+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
//
// AREA CODE
//
//+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
if( pFone->pAreaCode )
{
if( (pFone->fIs800) && nextPos )
{
//--------------------------
// Skip digit
//--------------------------
curPos = nextPos;
pClusterTok = m_TokList.GetNext( nextPos );
//--------------------------
// Skip "hundred"
//--------------------------
curPos = nextPos;
pClusterTok = m_TokList.GetNext( nextPos );
if( nextPos )
{
pClusterTok = InsertPhoneSilenceAtSpace( &nextPos, BND_Phone_AREA, SIL_Phone_AREA );
}
}
else
{
//-------------------------------------
// Skip "area" and...
//-------------------------------------
curPos = nextPos;
pClusterTok = m_TokList.GetNext( nextPos );
//-------------------------------------
// ...skip "code"
//-------------------------------------
curPos = nextPos;
pClusterTok = m_TokList.GetNext( nextPos );
DoNumByNumTemplate( &nextPos, pFone->pAreaCode->ulNumDigits );
if( nextPos )
{
pClusterTok = InsertPhoneSilenceAtSpace( &nextPos, BND_Phone_AREA, SIL_Phone_AREA );
}
}
}
//+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
//
// Digits
//
//+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
unsigned long i;
for( i = 0; i < pFone->ulNumGroups; i++ )
{
DoNumByNumTemplate( &nextPos, pFone->ppGroups[i]->ulNumDigits );
if( nextPos )
{
pClusterTok = InsertPhoneSilenceAtSpace( &nextPos, BND_Phone_DIGITS, SIL_Phone_DIGITS );
}
}
InsertPhoneSilenceAtEnd( BND_Phone_DIGITS, SIL_Phone_DIGITS );
} /* CFrontend::DoPhoneNumberTemplate */
/*****************************************************************************
* CFrontend::DoCurrencyTemplate *
*-------------------------------*
* Description:
* Prosody template for currency.
*
********************************************************************** MC ***/
void CFrontend::DoCurrencyTemplate( SPLISTPOS clusterPos, TTSSentItem *pSentItem )
{
SPDBG_FUNC( "CFrontend::DoCurrencyTemplate" );
TTSCurrencyItemInfo *pMoney;
CFEToken *pWordTok;
CFEToken *pClusterTok = NULL;
long cWordCount;
SPLISTPOS curPos, nextPos;
pMoney = (TTSCurrencyItemInfo*)&pSentItem->pItemInfo->Type;
curPos = nextPos = clusterPos;
if( pMoney->pPrimaryNumberPart->Type != eNUM_CARDINAL )
{
return;
}
cWordCount = DoIntegerTemplate( &nextPos,
pMoney->pPrimaryNumberPart,
pSentItem->ulNumWords );
curPos = nextPos;
if( cWordCount > 1 )
{
if( pMoney->fQuantifier )
{
if( nextPos != NULL )
{
curPos = nextPos;
pClusterTok = m_TokList.GetNext( nextPos );
}
cWordCount--;
}
}
if( cWordCount > 1 )
{
//---------------------------------
// Insert SILENCE after "dollars"
//---------------------------------
pWordTok = new CFEToken;
if( pWordTok )
{
nextPos = InsertSilenceAfterPos( pWordTok, curPos );
pWordTok->m_SilenceSource = SIL_Currency_DOLLAR;
pWordTok->m_TuneBoundaryType = NUMBER_BOUNDARY;
pWordTok->m_BoundarySource = BND_Currency_DOLLAR;
pWordTok = NULL;
//----------------------------
// Skip "dollar(s)"
//----------------------------
if( nextPos != NULL )
{
curPos = nextPos;
pClusterTok = m_TokList.GetNext( nextPos );
}
}
if( pMoney->pSecondaryNumberPart != NULL )
{
//----------------------------
// Skip SILENCE
//----------------------------
if( nextPos != NULL )
{
curPos = nextPos;
pClusterTok = m_TokList.GetNext( nextPos );
}
cWordCount--;
//----------------------------
// Skip AND
//----------------------------
if( nextPos != NULL )
{
curPos = nextPos;
if( pClusterTok->m_Accent == K_NOACC )
{
//--------------------------------------
// Force POS for "and" to noun
// so phrasing rules don't kick in!
//--------------------------------------
pClusterTok->m_Accent = K_DEACCENT;
pClusterTok->m_Accent_Prom = K_DEACCENT_PROM;
pClusterTok->POScode = MS_Noun;
pClusterTok->m_posClass = POS_CONTENT;
}
pClusterTok = m_TokList.GetNext( nextPos );
}
cWordCount--;
cWordCount = DoIntegerTemplate( &curPos,
pMoney->pSecondaryNumberPart,
cWordCount );
}
}
} /* CFrontend::DoCurrencyTemplate */
/*****************************************************************************
* CFrontend::DoNumByNumTemplate *
*---------------------------------*
* Description:
* Prosody template for RIGHT hand side of the decimal point.
*
********************************************************************** MC ***/
void CFrontend::DoNumByNumTemplate( SPLISTPOS *pClusterPos, long cWordCount )
{
SPDBG_FUNC( "CFrontend::DoNumByNumTemplate" );
CFEToken *pClusterTok;
SPLISTPOS curPos, nextPos;
curPos = nextPos = *pClusterPos;
while( cWordCount > 1 )
{
pClusterTok = NULL;
//-------------------------------------------------------------
// Right side of decimal point - add H* to every other word
//-------------------------------------------------------------
if( nextPos != NULL )
{
curPos = nextPos;
pClusterTok = m_TokList.GetNext( nextPos );
}
cWordCount--;
if( pClusterTok )
{
pClusterTok->m_Accent = K_ACCENT;
pClusterTok->m_Accent_Prom = K_ACCENT_PROM;
pClusterTok->m_AccentSource = ACC_NumByNum;
}
if( nextPos != NULL )
{
curPos = nextPos;
pClusterTok = m_TokList.GetNext( nextPos );
}
cWordCount--;
}
if( cWordCount > 0 )
{
if( nextPos != NULL )
{
curPos = nextPos;
pClusterTok = m_TokList.GetNext( nextPos );
}
cWordCount--;
}
*pClusterPos = nextPos;
} /* CFrontend::DoNumByNumTemplate */
/*****************************************************************************
* CFrontend::DoFractionTemplate *
*------------------------------*
* Description:
* Prosody template for RIGHT side of the decimal point.
*
********************************************************************** MC ***/
long CFrontend::DoFractionTemplate( SPLISTPOS *pClusterPos, TTSNumberItemInfo *pNInfo, long cWordCount )
{
SPDBG_FUNC( "CFrontend::DoFractionTemplate" );
CFEToken *pClusterTok;
TTSFractionItemInfo *pFInfo;
CFEToken *pWordTok;
pFInfo = pNInfo->pFractionalPart;
//--- Do Numerator...
if ( pFInfo->pNumerator->pIntegerPart )
{
cWordCount = DoIntegerTemplate( pClusterPos, pFInfo->pNumerator, cWordCount );
}
if( pFInfo->pNumerator->pDecimalPart )
{
//-----------------------------------------
// Skip "point" string...
//-----------------------------------------
(void) m_TokList.GetNext( *pClusterPos );
//-----------------------------------------
// ...and do single digit prosody
//-----------------------------------------
DoNumByNumTemplate( pClusterPos, pFInfo->pNumerator->pDecimalPart->ulNumDigits );
}
//--- Special case - a non-standard fraction (e.g. 1/4)
if( !pFInfo->fIsStandard )
{
if( !*pClusterPos )
{
*pClusterPos = m_TokList.GetTailPosition( );
}
else
{
pClusterTok = m_TokList.GetPrev( *pClusterPos );
}
}
pWordTok = new CFEToken;
if( pWordTok )
{
*pClusterPos = InsertSilenceBeforePos( pWordTok, *pClusterPos );
pWordTok->m_SilenceSource = SIL_Fractions_NUM;
pWordTok->m_TuneBoundaryType = NUMBER_BOUNDARY;
pWordTok->m_BoundarySource = BND_Frac_Num;
pWordTok = NULL;
//----------------------------
// Skip numerator
//----------------------------
if( *pClusterPos != NULL )
{
pClusterTok = m_TokList.GetNext( *pClusterPos );
}
}
//--- Do Denominator...
if ( pFInfo->pDenominator->pIntegerPart )
{
//-----------------------------------------
// Skip "over" string...
//-----------------------------------------
pClusterTok = m_TokList.GetNext( *pClusterPos );
if( pClusterTok->m_Accent == K_NOACC )
{
//--------------------------------------
// Force POS for "and" to noun
// so phrasing rules don't kick in!
//--------------------------------------
pClusterTok->m_Accent = K_DEACCENT;
pClusterTok->m_Accent_Prom = K_DEACCENT_PROM;
pClusterTok->POScode = MS_Noun;
pClusterTok->m_posClass = POS_CONTENT;
}
cWordCount = DoIntegerTemplate( pClusterPos, pFInfo->pDenominator, cWordCount );
}
if( pFInfo->pDenominator->pDecimalPart )
{
//-----------------------------------------
// Skip "point" string...
//-----------------------------------------
(void) m_TokList.GetNext( *pClusterPos );
//-----------------------------------------
// ...and do single digit prosody
//-----------------------------------------
DoNumByNumTemplate( pClusterPos, pFInfo->pDenominator->pDecimalPart->ulNumDigits );
}
return cWordCount;
} /* CFrontend::DoFractionTemplate */
/*****************************************************************************
* CFrontend::DoIntegerTemplate *
*------------------------------*
* Description:
* Prosody template for LEFT hand side of the decimal point.
*
********************************************************************** MC ***/
long CFrontend::DoIntegerTemplate( SPLISTPOS *pClusterPos, TTSNumberItemInfo *pNInfo, long cWordCount )
{
SPDBG_FUNC( "CFrontend::DoIntegerTemplate" );
long i;
CFEToken *pClusterTok;
CFEToken *pWordTok = NULL;
SPLISTPOS curPos, nextPos;
//------------------------------------------
// Special currency hack...sorry
//------------------------------------------
if( pNInfo->pIntegerPart->fDigitByDigit )
{
DoNumByNumTemplate( pClusterPos, pNInfo->pIntegerPart->ulNumDigits );
return cWordCount - pNInfo->pIntegerPart->ulNumDigits;
}
nextPos = curPos = *pClusterPos;
pClusterTok = m_TokList.GetNext( nextPos );
pClusterTok->m_Accent = K_DEACCENT;
pClusterTok->m_Accent_Prom = K_DEACCENT_PROM;
if( pNInfo->fNegative )
{
//---------------------------------
// Skip "NEGATIVE"
//---------------------------------
if( nextPos != NULL )
{
curPos = nextPos;
pClusterTok = m_TokList.GetNext( nextPos );
pClusterTok->m_Accent = K_DEACCENT;
pClusterTok->m_Accent_Prom = K_DEACCENT_PROM;
}
cWordCount--;
}
for( i = (pNInfo->pIntegerPart->lNumGroups -1); i >= 0; i-- )
{
//------------------------------------
// Accent 1st digit in group
//------------------------------------
pClusterTok->m_Accent = K_ACCENT;
pClusterTok->m_Accent_Prom = K_ACCENT_PROM;
pClusterTok->m_AccentSource = ACC_IntegerGroup;
if( pNInfo->pIntegerPart->Groups[i].fHundreds )
{
//---------------------------------
// Skip "X HUNDRED"
//---------------------------------
if( nextPos != NULL )
{
curPos = nextPos;
pClusterTok = m_TokList.GetNext( nextPos );
if( pClusterTok->m_Accent == K_NOACC )
{
pClusterTok->m_Accent = K_DEACCENT;
pClusterTok->m_Accent_Prom = K_DEACCENT_PROM;
}
}
cWordCount--;
if( nextPos != NULL )
{
curPos = nextPos;
pClusterTok = m_TokList.GetNext( nextPos );
if( pClusterTok->m_Accent == K_NOACC )
{
pClusterTok->m_Accent = K_DEACCENT;
pClusterTok->m_Accent_Prom = K_DEACCENT_PROM;
}
}
cWordCount--;
}
if( pNInfo->pIntegerPart->Groups[i].fTens )
{
//---------------------------------
// Skip "X-TY"
//---------------------------------
if( nextPos != NULL )
{
curPos = nextPos;
pClusterTok = m_TokList.GetNext( nextPos );
if( pClusterTok->m_Accent == K_NOACC )
{
pClusterTok->m_Accent = K_DEACCENT;
pClusterTok->m_Accent_Prom = K_DEACCENT_PROM;
}
}
cWordCount--;
}
if( pNInfo->pIntegerPart->Groups[i].fOnes )
{
//---------------------------------
// Skip "X"
//---------------------------------
if( nextPos != NULL )
{
curPos = nextPos;
pClusterTok = m_TokList.GetNext( nextPos );
if( pClusterTok->m_Accent == K_NOACC )
{
pClusterTok->m_Accent = K_DEACCENT;
pClusterTok->m_Accent_Prom = K_DEACCENT_PROM;
}
}
cWordCount--;
}
if( pNInfo->pIntegerPart->Groups[i].fQuantifier )
{
//---------------------------------
// Insert SILENCE after quant
//---------------------------------
if( pWordTok == NULL )
{
pWordTok = new CFEToken;
}
if( pWordTok )
{
nextPos = InsertSilenceAfterPos( pWordTok, curPos );
pWordTok->m_SilenceSource = SIL_Integer_Quant;
pWordTok->m_TuneBoundaryType = NUMBER_BOUNDARY;
pWordTok->m_BoundarySource = BND_IntegerQuant;
pWordTok = NULL;
if( pClusterTok->m_Accent == K_NOACC )
{
pClusterTok->m_Accent = K_DEACCENT;
pClusterTok->m_Accent_Prom = K_DEACCENT_PROM;
}
if( nextPos != NULL )
{
//------------------------------
// Skip inserted silence
//------------------------------
curPos = nextPos;
pClusterTok = m_TokList.GetNext( nextPos );
}
if( nextPos != NULL )
{
//-----------------------------------
// Skip quantifier string
//-----------------------------------
curPos = nextPos;
pClusterTok = m_TokList.GetNext( nextPos );
}
cWordCount--;
}
}
}
*pClusterPos = curPos;
return cWordCount;
} /* CFrontend::DoIntegerTemplate */
/*****************************************************************************
* CFrontend::GetSentenceTokens *
*------------------------------*
* Description:
* Collect Senence Enum tokens.
* Copy tokens into 'm_TokList' and token count into 'm_cNumOfWords'
* S_FALSE return means no more input sentences.+++
*
********************************************************************** MC ***/
HRESULT CFrontend::GetSentenceTokens( DIRECTION eDirection )
{
SPDBG_FUNC( "CFrontend::GetSentenceTokens" );
HRESULT eHR = S_OK;
bool fLastItem = false;
IEnumSENTITEM *pItemizer;
TTSSentItem sentItem;
long tokenIndex;
CFEToken *pWordTok;
bool lastWasTerm = false;
bool lastWasSil = true;
TUNE_TYPE defaultTune = PHRASE_BOUNDARY;
long cNumOfItems, cCurItem, cCurWord;
SPLISTPOS clusterPos, tempPos;
m_cNumOfWords = 0;
pWordTok = NULL;
clusterPos = NULL;
if ( eDirection == eNEXT )
{
eHR = m_pEnumSent->Next( &pItemizer );
}
else
{
eHR = m_pEnumSent->Previous( &pItemizer );
}
if( eHR == S_OK )
{
//--------------------------------------------
// There's still another sentence to speak
//--------------------------------------------
tokenIndex = 0;
CItemList& ItemList = ((CSentItemEnum*)pItemizer)->_GetList();
cNumOfItems = (ItemList.GetCount()) -1;
cCurItem = 0;
//------------------------------------
// Collect all sentence tokens
//------------------------------------
while( (eHR = pItemizer->Next( &sentItem )) == S_OK )
{
clusterPos = NULL;
cCurWord = sentItem.ulNumWords;
for ( ULONG i = 0; i < sentItem.ulNumWords; i++ )
{
//------------------------------
// Always have a working token
//------------------------------
if( pWordTok == NULL )
{
pWordTok = new CFEToken;
}
if( pWordTok )
{
if( sentItem.pItemInfo->Type & eWORDLIST_IS_VALID )
{
//------------------------------------------
// Get tag values (vol, rate, pitch, etc.)
//------------------------------------------
GetItemControls( sentItem.Words[i].pXmlState, pWordTok );
//------------------------------------------
//
//------------------------------------------
//-------------------------------------
// Switch on token type
//-------------------------------------
switch ( sentItem.Words[i].pXmlState->eAction )
{
case SPVA_Speak:
case SPVA_SpellOut:
{
//----------------------------------
// Speak this token
//----------------------------------
pWordTok->tokLen = sentItem.Words[i].ulWordLen;
if( pWordTok->tokLen > (TOKEN_LEN_MAX -1) )
{
//-----------------------------------
// Clip to max string length
//-----------------------------------
pWordTok->tokLen = TOKEN_LEN_MAX -1;
}
//--------------------------
// Copy token string
// Append C-string delimiter
//--------------------------
memcpy( &pWordTok->tokStr[0], &sentItem.Words[i].pWordText[0],
pWordTok->tokLen * sizeof(WCHAR) );
pWordTok->tokStr[pWordTok->tokLen] = 0; //string delimiter
pWordTok->phon_Len = IPA_to_Allo( sentItem.Words[i].pWordPron,
pWordTok->phon_Str );
pWordTok->POScode = sentItem.Words[i].eWordPartOfSpeech;
pWordTok->m_posClass = GetPOSClass( pWordTok->POScode );
pWordTok->srcPosition = sentItem.ulItemSrcOffset;
pWordTok->srcLen = sentItem.ulItemSrcLen;
pWordTok->m_PitchBaseOffs = m_CurPitchOffs;
pWordTok->m_PitchRangeScale = m_CurPitchRange;
pWordTok->m_ProsodyDurScale = m_RateRatio_PROSODY;
//----------------------------------
// Advance to next token
//----------------------------------
tempPos = m_TokList.AddTail( pWordTok );
if( clusterPos == NULL )
{
//--------------------------------------
// Remember where currentitem started
//--------------------------------------
clusterPos = tempPos;
}
pWordTok = NULL; // Get a new ptr next time
tokenIndex++;
lastWasTerm = false;
lastWasSil = false;
break;
}
case SPVA_Silence:
{
(void)InsertSilenceAtTail( pWordTok, &sentItem, sentItem.Words[i].pXmlState->SilenceMSecs );
pWordTok->m_SilenceSource = SIL_XML;
pWordTok = NULL; // Get a new ptr next time
tokenIndex++;
lastWasTerm = false;
break;
}
case SPVA_Pronounce:
{
pWordTok->tokStr[0] = 0; // There's no orth for Pron types
pWordTok->tokLen = 0;
pWordTok->phon_Len = IPA_to_Allo( sentItem.Words[i].pXmlState->pPhoneIds, pWordTok->phon_Str );
pWordTok->POScode = sentItem.Words[i].eWordPartOfSpeech;
pWordTok->m_posClass = GetPOSClass( pWordTok->POScode );
pWordTok->srcPosition = sentItem.ulItemSrcOffset;
pWordTok->srcLen = sentItem.ulItemSrcLen;
pWordTok->m_PitchBaseOffs = m_CurPitchOffs;
pWordTok->m_PitchRangeScale = m_CurPitchRange;
pWordTok->m_ProsodyDurScale = m_RateRatio_PROSODY;
//----------------------------------
// Advance to next token
//----------------------------------
tempPos = m_TokList.AddTail( pWordTok );
if( clusterPos == NULL )
{
//--------------------------------------
// Remember where currentitem started
//--------------------------------------
clusterPos = tempPos;
}
pWordTok = NULL; // Get a new ptr next time
tokenIndex++;
lastWasTerm = false;
lastWasSil = false;
break;
}
case SPVA_Bookmark:
{
BOOKMARK_ITEM *pMarker;
//-------------------------------------------------
// Create bookmark list if it's not already there
//-------------------------------------------------
if( pWordTok->pBMObj == NULL )
{
pWordTok->pBMObj = new CBookmarkList;
}
if( pWordTok->pBMObj )
{
//--------------------------------------------------------
// Allocate memory for bookmark string
// (add 1 to length for string delimiter)
//--------------------------------------------------------
pWordTok->tokLen = sentItem.Words[i].ulWordLen;
pMarker = new BOOKMARK_ITEM;
if (pMarker)
{
//----------------------------------------
// We'll need the text ptr and length
// when this bookmark event gets posted
//----------------------------------------
pMarker->pBMItem = (LPARAM)sentItem.pItemSrcText;
//--- Punch NULL character into end of bookmark string for Event...
WCHAR* pTemp = (WCHAR*) sentItem.pItemSrcText + sentItem.ulItemSrcLen;
*pTemp = 0;
//-----------------------------------
// Add this bookmark to list
//-----------------------------------
pWordTok->pBMObj->m_BMList.AddTail( pMarker );
}
}
break;
}
default:
{
SPDBG_DMSG1( "Unknown SPVSTATE eAction: %d\n", sentItem.Words[i].pXmlState->eAction );
break;
}
}
}
else
{
//-----------------------------
// Maybe token is punctuation
//-----------------------------
if ( fIsPunctuation(sentItem) )
{
TUNE_TYPE bType = NULL_BOUNDARY;
switch ( sentItem.pItemInfo->Type )
{
case eCOMMA:
case eSEMICOLON:
case eCOLON:
case eHYPHEN:
if( !lastWasSil )
{
bType = PHRASE_BOUNDARY;
}
break;
case ePERIOD:
if( fLastItem )
{
bType = DECLAR_BOUNDARY;
}
else
{
defaultTune = DECLAR_BOUNDARY;
}
break;
case eQUESTION:
if( fLastItem )
{
bType = YN_QUEST_BOUNDARY;
}
else
{
defaultTune = YN_QUEST_BOUNDARY;
}
break;
case eEXCLAMATION:
if( fLastItem )
{
bType = EXCLAM_BOUNDARY;
}
else
{
defaultTune = EXCLAM_BOUNDARY;
}
break;
}
if( (bType != NULL_BOUNDARY) && (tokenIndex > 0) )
{
pWordTok->m_TuneBoundaryType = bType;
pWordTok->phon_Len = 1;
pWordTok->phon_Str[0] = _SIL_;
pWordTok->srcPosition = sentItem.ulItemSrcOffset;
pWordTok->srcLen = sentItem.ulItemSrcLen;
pWordTok->tokStr[0] = sentItem.pItemSrcText[0]; // punctuation
pWordTok->tokStr[1] = 0; // delimiter
pWordTok->tokLen = 1;
pWordTok->m_SilenceSource = SIL_Term;
pWordTok->m_TermSil = 0;
//----------------------------------
// Advance to next token
//----------------------------------
tempPos = m_TokList.AddTail( pWordTok );
if( clusterPos == NULL )
{
//--------------------------------------
// Remember where currentitem started
//--------------------------------------
clusterPos = tempPos;
}
pWordTok = NULL; // Get a new ptr next time
tokenIndex++;
lastWasTerm = true;
lastWasSil = true;
}
}
else
{
switch ( sentItem.pItemInfo->Type )
{
//case eSINGLE_QUOTE:
case eDOUBLE_QUOTE:
if( StateQuoteProsody( pWordTok, &sentItem, (!fLastItem) & (!lastWasSil) ) )
{
if( (!fLastItem) & (!lastWasSil) )
{
pWordTok = NULL; // Get a new ptr next time
tokenIndex++;
}
lastWasTerm = false;
lastWasSil = true;
}
break;
case eOPEN_PARENTHESIS:
case eOPEN_BRACKET:
case eOPEN_BRACE:
if( StartParenProsody( pWordTok, &sentItem, !fLastItem ) )
{
if( !fLastItem )
{
pWordTok = NULL; // Get a new ptr next time
tokenIndex++;
}
lastWasTerm = false;
lastWasSil = true;
}
break;
case eCLOSE_PARENTHESIS:
case eCLOSE_BRACKET:
case eCLOSE_BRACE:
if( EndParenProsody( pWordTok, &sentItem, !fLastItem ) )
{
if( !fLastItem )
{
pWordTok = NULL; // Get a new ptr next time
tokenIndex++;
}
lastWasTerm = false;
lastWasSil = true;
}
break;
}
}
}
}
else
{
eHR = E_OUTOFMEMORY;
break;
}
if( --cCurWord == 0 )
{
cCurItem++;
}
if( cCurItem == cNumOfItems )
{
fLastItem = true;
}
}
//-------------------------------------
// Tag special word clusters
//-------------------------------------
ProsodyTemplates( clusterPos, &sentItem );
}
pItemizer->Release();
//------------------------------------------------------
// Make sure sentence ends on termination
//------------------------------------------------------
if( !lastWasTerm )
{
//------------------------
// Add a comma
//------------------------
if( pWordTok == NULL )
{
pWordTok = new CFEToken;
}
if( pWordTok )
{
pWordTok->m_TuneBoundaryType = defaultTune;
pWordTok->m_BoundarySource = BND_ForcedTerm;
pWordTok->m_SilenceSource = SIL_Term;
pWordTok->phon_Len = 1;
pWordTok->phon_Str[0] = _SIL_;
pWordTok->srcPosition = sentItem.ulItemSrcOffset;
pWordTok->srcLen = sentItem.ulItemSrcLen;
pWordTok->tokStr[0] = '.'; // punctuation
pWordTok->tokStr[1] = 0; // delimiter
pWordTok->tokLen = 1;
// pWordTok->m_BoundarySource = bndSource;
//----------------------------------
// Advance to next token
//----------------------------------
tempPos = m_TokList.AddTail( pWordTok );
if( clusterPos == NULL )
{
//--------------------------------------
// Remember where current item started
//--------------------------------------
clusterPos = tempPos;
}
pWordTok = NULL; // Get a new ptr next time
tokenIndex++;
}
else
{
//----------------------------------
// Bail-out or we'll crash
//----------------------------------
eHR = E_OUTOFMEMORY;
}
}
m_cNumOfWords = tokenIndex;
if( eHR == S_FALSE )
{
//----------------------------------
// Return only errors
//----------------------------------
eHR = S_OK;
}
}
else
{
eHR = eHR; // !!!!
}
//-------------------------------
// Cleanup memory allocation
//-------------------------------
if( pWordTok != NULL )
{
delete pWordTok;
}
//---------------------------------------------------
// Get sentence position and length for SAPI events
//---------------------------------------------------
CalcSentenceLength();
return eHR;
} /* CFrontend::GetSentenceTokens */
/*****************************************************************************
* CFrontend::CalcSentenceLength *
*-------------------------------*
* Description:
* Loop thru token list and sum the source char count.
*
********************************************************************** MC ***/
void CFrontend::CalcSentenceLength()
{
long firstIndex, lastIndex, lastLen;
bool firstState;
SPLISTPOS listPos;
CFEToken *pWordTok, *pFirstTok = NULL;
//---------------------------------------------
// Find the 1st and last words in sentence
//---------------------------------------------
firstIndex = lastIndex = lastLen = 0;
firstState = true;
listPos = m_TokList.GetHeadPosition();
while( listPos )
{
pWordTok = m_TokList.GetNext( listPos );
//-------------------------------------------
// Look at at displayable words only
//-------------------------------------------
if( pWordTok->srcLen > 0 )
{
if( firstState )
{
firstState = false;
firstIndex = pWordTok->srcPosition;
pFirstTok = pWordTok;
}
else
{
lastIndex = pWordTok->srcPosition;
lastLen = pWordTok->srcLen;
}
}
}
//--------------------------------------------------
// Calculate sentence length for head list item
//--------------------------------------------------
if( pFirstTok )
{
pFirstTok->sentencePosition = firstIndex; // Sentence starts here...
pFirstTok->sentenceLen = (lastIndex - firstIndex) + lastLen; // ...and this is the length
}
}
/*****************************************************************************
* CFrontend::DisposeUnits *
*-------------------------*
* Description:
* Delete memory allocated to 'm_pUnits'.
* Clean-up memory for Bookmarks
*
********************************************************************** MC ***/
#ifdef USE_VOICEDATAOBJ
void CFrontend::DisposeUnits( )
{
SPDBG_FUNC( "CFrontend::DisposeUnits" );
ULONG unitIndex;
if( m_pUnits )
{
//-----------------------------------------
// Clean-up Bookmark memory allocation
//-----------------------------------------
for( unitIndex = m_CurUnitIndex; unitIndex < m_unitCount; unitIndex++)
{
if( m_pUnits[unitIndex].pBMObj != NULL )
{
//---------------------------------------
// Dispose bookmark list
//---------------------------------------
delete m_pUnits[unitIndex].pBMObj;
m_pUnits[unitIndex].pBMObj = NULL;
}
}
delete m_pUnits;
m_pUnits = NULL;
}
} /* CFrontend::DisposeUnits */
#endif
/*****************************************************************************
* CFrontend::ParseNextSentence *
*------------------------------*
* Description:
* Fill 'm_pUnits' array with next sentence.
* If there's no more input text,
* return with 'm_SpeechState' set to SPEECH_DONE +++
*
********************************************************************** MC ***/
HRESULT CFrontend::ParseSentence( DIRECTION eDirection )
{
SPDBG_FUNC( "CFrontend::ParseNextSentence" );
HRESULT hr = S_OK;
//-----------------------------------------------------
// If there's a previous unit array, free its memory
//-----------------------------------------------------
#ifdef USE_VOICEDATAOBJ
DisposeUnits();
#endif
m_CurUnitIndex = 0;
m_unitCount = 0;
DeleteTokenList();
#ifdef USE_VOICEDATAOBJ
m_pUnits = NULL;
#endif
//-----------------------------------------------------
// If there's a previous allo array, free its memory
//-----------------------------------------------------
if( m_pAllos )
{
delete m_pAllos;
m_pAllos = NULL;
}
//-----------------------------------------------------
// Fill token array with next sentence
// Skip empty sentences.
// NOTE: includes non-speaking items
//-----------------------------------------------------
do
{
hr = GetSentenceTokens( eDirection );
} while( (hr == S_OK) && (m_cNumOfWords == 0) );
if( hr == S_OK )
{
//--------------------------------------------
// Prepare word emphasis
//--------------------------------------------
DoWordAccent();
//--------------------------------------------
// Word level prosodic lables
//--------------------------------------------
DoPhrasing();
ToBISymbols();
//--------------------------------------------
// Convert tokens to allo list
//--------------------------------------------
m_pAllos = new CAlloList;
if (m_pAllos == NULL)
{
//-----------------------
// Out of memory
//-----------------------
hr = E_FAIL;
}
if( SUCCEEDED(hr) )
{
//--------------------------------
// Convert word to allo strteam
//-------------------------------
TokensToAllo( &m_TokList, m_pAllos );
//----------------------------
// Tag sentence syllables
//----------------------------
m_SyllObj.TagSyllables( m_pAllos );
//--------------------------------------------
// Dispose token array, no longer needed
//--------------------------------------------
DeleteTokenList();
//--------------------------------------------
// Create the unit array
// NOTE:
//--------------------------------------------
#ifdef USE_VOICEDATAOBJ
hr = UnitLookahead ();
if( hr == S_OK )
{
//--------------------------------------------
// Compute allo durations
//--------------------------------------------
UnitToAlloDur( m_pAllos, m_pUnits );
m_DurObj.AlloDuration( m_pAllos, m_RateRatio_API );
//--------------------------------------------
// Modulate allo pitch
//--------------------------------------------
m_PitchObj.AlloPitch( m_pAllos, m_BasePitch, m_PitchRange );
}
#else
m_DurObj.AlloDuration( m_pAllos, m_RateRatio_API );
m_PitchObj.AlloPitch( m_pAllos, m_BasePitch, m_PitchRange );
#endif
}
#ifdef USE_VOICEDATAOBJ
if( hr == S_OK )
{
AlloToUnitPitch( m_pAllos, m_pUnits );
}
#endif
}
if( FAILED(hr) )
{
//------------------------------------------
// Either the input text is dry or we failed.
// Try to fail gracefully
// 1 - Clean up memory
// 2 - End the speech
//------------------------------------------
if( m_pAllos )
{
delete m_pAllos;
m_pAllos = 0;
}
DeleteTokenList();
#ifdef USE_VOICEDATAOBJ
DisposeUnits();
#endif
m_SpeechState = SPEECH_DONE;
}
else if( hr == S_FALSE )
{
//---------------------------------
// No more input text
//---------------------------------
hr = S_OK;
m_SpeechState = SPEECH_DONE;
}
return hr;
} /* CFrontend::ParseNextSentence */
/*****************************************************************************
* CFrontend::UnitLookahead *
*--------------------------*
* Description:
*
********************************************************************** MC ***/
#ifdef USE_VOICEDATAOBJ
HRESULT CFrontend::UnitLookahead ()
{
SPDBG_FUNC( "CFrontend::UnitLookahead" );
HRESULT hr = S_OK;
UNIT_CVT *pPhon2Unit = NULL;
ULONG i;
m_unitCount = m_pAllos->GetCount();
m_pUnits = new UNITINFO[m_unitCount];
if( m_pUnits )
{
pPhon2Unit = new UNIT_CVT[m_unitCount];
if( pPhon2Unit )
{
//--------------------------------------------
// Convert allo list to unit array
//--------------------------------------------
memset( m_pUnits, 0, m_unitCount * sizeof(UNITINFO) );
hr = AlloToUnit( m_pAllos, m_pUnits );
if( SUCCEEDED(hr) )
{
//--------------------------------------------
// Initialize UNIT_CVT
//--------------------------------------------
for( i = 0; i < m_unitCount; i++ )
{
pPhon2Unit[i].PhonID = m_pUnits[i].PhonID;
pPhon2Unit[i].flags = m_pUnits[i].flags;
}
//--------------------------------------------
// Compute triphone IDs
//--------------------------------------------
hr = m_pVoiceDataObj->GetUnitIDs( pPhon2Unit, m_unitCount );
if( SUCCEEDED(hr) )
{
//--------------------------------------------
// Copy UNIT_CVT to UNITINFO
//--------------------------------------------
for( i = 0; i < m_unitCount; i++ )
{
m_pUnits[i].UnitID = pPhon2Unit[i].UnitID;
m_pUnits[i].SenoneID = pPhon2Unit[i].SenoneID;
m_pUnits[i].duration = pPhon2Unit[i].Dur;
m_pUnits[i].amp = pPhon2Unit[i].Amp;
m_pUnits[i].ampRatio = pPhon2Unit[i].AmpRatio;
strcpy( m_pUnits[i].szUnitName, pPhon2Unit[i].szUnitName );
}
}
else
{
//-----------------------
// Can't get unit ID's
//-----------------------
delete m_pUnits;
m_pUnits = NULL;
}
}
else
{
//-----------------------
// Can't convert allos
//-----------------------
delete m_pUnits;
m_pUnits = NULL;
}
}
else
{
//-----------------------
// Out of memory
//-----------------------
delete m_pUnits;
m_pUnits = NULL;
hr = E_FAIL;
}
}
else
{
//-----------------------
// Out of memory
//-----------------------
hr = E_FAIL;
}
//------------------------------
// Cleanup before exit
//------------------------------
if( pPhon2Unit )
{
delete pPhon2Unit;
}
return hr;
} /* CFrontend::UnitLookahead */
#endif
/*****************************************************************************
* CFrontend::UnitToAlloDur *
*--------------------------*
* Description:
*
********************************************************************** MC ***/
void CFrontend::UnitToAlloDur( CAlloList *pAllos, UNITINFO *pu )
{
SPDBG_FUNC( "CFrontend::UnitToAlloDur" );
CAlloCell *pCurCell;
pCurCell = pAllos->GetHeadCell();
while( pCurCell )
{
pCurCell->m_UnitDur = pu->duration;
pu++;
pCurCell = pAllos->GetNextCell();
}
} /* CFrontend::UnitToAlloDur */
/*****************************************************************************
* CFrontend::AlloToUnitPitch *
*----------------------------*
* Description:
*
********************************************************************** MC ***/
#ifdef USE_VOICEDATAOBJ
void CFrontend::AlloToUnitPitch( CAlloList *pAllos, UNITINFO *pu )
{
SPDBG_FUNC( "CFrontend::AlloToUnitPitch" );
ULONG k;
CAlloCell *pCurCell;
pCurCell = pAllos->GetHeadCell();
while( pCurCell )
{
pu->duration = pCurCell->m_ftDuration;
for( k = 0; k < pu->nKnots; k++ )
{
pu->pTime[k] = pCurCell->m_ftTime[k] * m_SampleRate;
pu->pF0[k] = pCurCell->m_ftPitch[k];
pu->pAmp[k] = pu->ampRatio;
}
pu++;
pCurCell = pAllos->GetNextCell();
}
} /* CFrontend::AlloToUnitPitch */
#endif
/*****************************************************************************
* CAlloList::DeleteTokenList *
*----------------------------*
* Description:
* Remove every item in link list.
*
********************************************************************** MC ***/
void CFrontend::DeleteTokenList()
{
SPDBG_FUNC( "CFrontend::DeleteTokenList" );
CFEToken *pTok;
while( !m_TokList.IsEmpty() )
{
pTok = (CFEToken*)m_TokList.RemoveHead();
delete pTok;
}
} /* CFrontend::DeleteTokenList */
/*****************************************************************************
* AdjustQuestTune *
*-----------------*
* Description:
* Adjust termination for either YN or WH sentence tune.
*
********************************************************************** MC ***/
static void AdjustQuestTune( CFEToken *pTok, bool fIsYesNo )
{
SPDBG_FUNC( "AdjustQuestTune" );
if ( pTok->m_TuneBoundaryType > NULL_BOUNDARY )
{
if( (pTok->m_TuneBoundaryType == YN_QUEST_BOUNDARY) ||
(pTok->m_TuneBoundaryType == WH_QUEST_BOUNDARY) )
{
//------------------------------------
// Is this a yes/no question phrase
//------------------------------------
if( fIsYesNo )
{
//------------------------------------------
// Put out a final yes/no question marker
//------------------------------------------
pTok->m_TuneBoundaryType = YN_QUEST_BOUNDARY;
pTok->m_BoundarySource = BND_YNQuest;
}
else
{
//------------------------------------------------------------------------
// Use declarative phrase marker (for WH questions)
//------------------------------------------------------------------------
pTok->m_TuneBoundaryType = WH_QUEST_BOUNDARY;
pTok->m_BoundarySource = BND_WHQuest;
}
}
}
} /* AdjustQuestTune */
typedef enum
{
p_Interj,
P_Adv,
P_Verb,
P_Adj,
P_Noun,
PRIORITY_SIZE,
} CONTENT_PRIORITY;
#define NO_POSITION -1
/*****************************************************************************
* CFrontend::ExclamEmph *
*-----------------------*
* Description:
* Find a likely word to emph if sentence has exclamation
*
********************************************************************** MC ***/
void CFrontend::ExclamEmph()
{
SPDBG_FUNC( "CFrontend::ExclamEmph" );
CFEToken *pCur_Tok;
SPLISTPOS listPos, targetPos, curPos, contentPos[PRIORITY_SIZE];
long cContent, cWords;
long i;
for(i = 0; i < PRIORITY_SIZE; i++ )
{
contentPos[i] = (SPLISTPOS)NO_POSITION;
}
listPos = m_TokList.GetTailPosition();
pCur_Tok = m_TokList.GetNext( listPos );
//---------------------------------------------------
// First, check last token fors an exclamation
//---------------------------------------------------
if( pCur_Tok->m_TuneBoundaryType == EXCLAM_BOUNDARY )
{
//-----------------------------------------------------
// Then, see if there's only one content word
// in the sentence
//-----------------------------------------------------
cContent = cWords = 0;
listPos = m_TokList.GetHeadPosition();
while( listPos )
{
curPos = listPos;
pCur_Tok = m_TokList.GetNext( listPos );
if( pCur_Tok->m_posClass == POS_CONTENT )
{
cContent++;
cWords++;
if( cContent == 1)
{
targetPos = curPos;
}
//--------------------------------------------------------
// Fill the famous Azara Content Prominence Hierarchy (ACPH)
//--------------------------------------------------------
if( (pCur_Tok->POScode == MS_Noun) && (contentPos[P_Noun] == (SPLISTPOS)NO_POSITION) )
{
contentPos[P_Noun] = curPos;
}
else if( (pCur_Tok->POScode == MS_Verb) && (contentPos[P_Verb] == (SPLISTPOS)NO_POSITION) )
{
contentPos[P_Verb] = curPos;
}
else if( (pCur_Tok->POScode == MS_Adj) && (contentPos[P_Adj] == (SPLISTPOS)NO_POSITION) )
{
contentPos[P_Adj] = curPos;
}
else if( (pCur_Tok->POScode == MS_Adv) && (contentPos[P_Adv] == (SPLISTPOS)NO_POSITION) )
{
contentPos[P_Adv] = curPos;
}
else if( (pCur_Tok->POScode == MS_Interjection) && (contentPos[p_Interj] == (SPLISTPOS)NO_POSITION) )
{
contentPos[p_Interj] = curPos;
}
}
else if( pCur_Tok->m_posClass == POS_FUNC )
{
cWords++;
if( cWords == 1)
{
targetPos = curPos;
}
}
}
//--------------------------------------------
// If there's only one word or content word
// then EMPHASIZE it
//--------------------------------------------
if( (cContent == 1) || (cWords == 1) )
{
pCur_Tok = m_TokList.GetNext( targetPos );
pCur_Tok->user_Emph = 1;
}
else if( cContent > 1 )
{
for(i = 0; i < PRIORITY_SIZE; i++ )
{
if( contentPos[i] != (SPLISTPOS)NO_POSITION )
{
targetPos = contentPos[i];
break;
}
}
pCur_Tok = m_TokList.GetNext( targetPos );
pCur_Tok->user_Emph = 1;
}
}
} //ExclamEmph
/*****************************************************************************
* CFrontend::DoWordAccent *
*-------------------------*
* Description:
* Prepare word for emphasis
*
********************************************************************** MC ***/
void CFrontend::DoWordAccent()
{
SPDBG_FUNC( "CFrontend::DoWordAccent" );
long cNumOfWords;
long iCurWord;
CFEToken *pCur_Tok, *pNext_Tok, *pPrev_Tok, *pTempTok;
SPLISTPOS listPos;
TUNE_TYPE cur_Bnd, prev_Bnd;
//-----------------------------
// Initilize locals
//-----------------------------
cNumOfWords = m_TokList.GetCount();
if( cNumOfWords > 0 )
{
ExclamEmph();
prev_Bnd = PHRASE_BOUNDARY; // Assume start of sentence
//-------------------------------------
// Fill the token pipeline
//-------------------------------------
listPos = m_TokList.GetHeadPosition();
//-- Previous
pPrev_Tok = NULL;
//-- Current
pCur_Tok = m_TokList.GetNext( listPos );
//-- Next
if( listPos )
{
pNext_Tok = m_TokList.GetNext( listPos );
}
else
{
pNext_Tok = NULL;
}
//-----------------------------------
// Step through entire word array
// (skip last)
//-----------------------------------
for( iCurWord = 0; iCurWord < (cNumOfWords -1); iCurWord++ )
{
cur_Bnd = pCur_Tok->m_TuneBoundaryType;
if( pCur_Tok->user_Emph > 0 )
{
//-----------------------------------
// Current word is emphasized
//-----------------------------------
if( prev_Bnd == NULL_BOUNDARY )
{
pTempTok = new CFEToken;
if( pTempTok )
{
pTempTok->user_Break = EMPH_HESITATION;
pTempTok->m_TuneBoundaryType = NULL_BOUNDARY;
pTempTok->phon_Len = 1;
pTempTok->phon_Str[0] = _SIL_;
pTempTok->srcPosition = pCur_Tok->srcPosition;
pTempTok->srcLen = pCur_Tok->srcLen;
pTempTok->tokStr[0] = 0; // There's no orth for Break
pTempTok->tokLen = 0;
pTempTok->m_TermSil = 0;
pTempTok->m_SilenceSource = SIL_Emph;
pTempTok->m_DurScale = 0;
if( pPrev_Tok )
{
//pTempTok->m_DurScale = pPrev_Tok->m_DurScale;
pTempTok->m_ProsodyDurScale = pPrev_Tok->m_ProsodyDurScale;
pTempTok->user_Volume = pPrev_Tok->user_Volume;
}
else
{
//pTempTok->m_DurScale = 1.0f;
pTempTok->m_ProsodyDurScale = 1.0f;
}
m_TokList.InsertBefore( m_TokList.FindIndex( iCurWord ), pTempTok );
pCur_Tok = pTempTok;
m_cNumOfWords++;
cNumOfWords++;
iCurWord++;
}
}
}
//------------------------------
// Shift the token pipeline
//------------------------------
prev_Bnd = cur_Bnd;
pPrev_Tok = pCur_Tok;
pCur_Tok = pNext_Tok;
if( listPos )
{
pNext_Tok = m_TokList.GetNext( listPos );
}
else
{
pNext_Tok = NULL;
}
}
}
} /* CFrontend::DoWordAccent */
/*****************************************************************************
* CFrontend::DoPhrasing *
*-----------------------*
* Description:
* Insert sub-phrase boundaries into word token array
*
********************************************************************** MC ***/
void CFrontend::DoPhrasing()
{
SPDBG_FUNC( "CFrontend::DoPhrasing" );
long iCurWord;
CFEToken *pCur_Tok, *pNext_Tok, *pNext2_Tok, *pNext3_Tok, *pTempTok, *pPrev_Tok;
ENGPARTOFSPEECH cur_POS, next_POS, next2_POS, next3_POS, prev_POS;
bool fNext_IsPunct, fNext2_IsPunct, fNext3_IsPunct;
bool fIsYesNo, fMaybeWH, fHasDet, fInitial_Adv, fIsShortSent, fIsAlphaWH;
TUNE_TYPE cur_Bnd, prev_Punct;
long punctDistance;
long cNumOfWords;
SPLISTPOS listPos;
BOUNDARY_SOURCE bndNum;
ACCENT_SOURCE accNum;
//-----------------------------
// Initialize locals
//-----------------------------
cNumOfWords = m_TokList.GetCount();
if( cNumOfWords > 0 )
{
cur_Bnd = NULL_BOUNDARY;
prev_POS = MS_Unknown;
prev_Punct = PHRASE_BOUNDARY; // Assume start of sentence
punctDistance = 0; // To quiet the compiler...
fIsYesNo = fMaybeWH = fHasDet = fIsAlphaWH = false; // To quiet the compiler...
fMaybeWH = false;
fInitial_Adv = false;
if (cNumOfWords <= 9)
{
fIsShortSent = true;
}
else
{
fIsShortSent = false;
}
//-------------------------------------
// Fill the token pipeline
//-------------------------------------
listPos = m_TokList.GetHeadPosition();
//-- Previous
pPrev_Tok = NULL;
//-- Current
pCur_Tok = m_TokList.GetNext( listPos );
//-- Next
if( listPos )
{
pNext_Tok = m_TokList.GetNext( listPos );
}
else
{
pNext_Tok = NULL;
}
//-- Next 2
if( listPos )
{
pNext2_Tok = m_TokList.GetNext( listPos );
}
else
{
pNext2_Tok = NULL;
}
//-- Next 3
if( listPos )
{
pNext3_Tok = m_TokList.GetNext( listPos );
}
else
{
pNext3_Tok = NULL;
}
//-----------------------------------
// Step through entire word array
// (skip last)
//-----------------------------------
for( iCurWord = 0; iCurWord < (cNumOfWords -1); iCurWord++ )
{
bndNum = BND_NoSource;
accNum = ACC_NoSource;
if( (prev_Punct > NULL_BOUNDARY) && (prev_Punct < SUB_BOUNDARY_1) )
{
punctDistance = 1;
fIsYesNo = true;
fMaybeWH = false;
fHasDet = false;
fIsAlphaWH = false;
}
else
{
punctDistance++;
}
//------------------------------------
// Process new word
//------------------------------------
cur_POS = pCur_Tok->POScode;
cur_Bnd = NULL_BOUNDARY;
//------------------------------------
// Don't depend on POS to detect
// "WH" question
//------------------------------------
if( ((pCur_Tok->tokStr[0] == 'W') || (pCur_Tok->tokStr[0] == 'w')) &&
((pCur_Tok->tokStr[1] == 'H') || (pCur_Tok->tokStr[1] == 'h')) )
{
fIsAlphaWH = true;
}
else
{
fIsAlphaWH = false;
}
//------------------------------------
// Look ahead to NEXT word
//------------------------------------
next_POS = pNext_Tok->POScode;
if( pNext_Tok->m_TuneBoundaryType != NULL_BOUNDARY )
{
fNext_IsPunct = true;
}
else
{
fNext_IsPunct = false;
}
//------------------------------------
// Look ahead 2 positions
//------------------------------------
if( pNext2_Tok )
{
next2_POS = pNext2_Tok->POScode;
if( pNext2_Tok->m_TuneBoundaryType != NULL_BOUNDARY )
{
fNext2_IsPunct = true;
}
else
{
fNext2_IsPunct = false;
}
}
else
{
next2_POS = MS_Unknown;
fNext2_IsPunct = false;
}
//------------------------------------
// Look ahead 3 positions
//------------------------------------
if( pNext3_Tok )
{
next3_POS = pNext3_Tok->POScode;
if( pNext3_Tok->m_TuneBoundaryType != NULL_BOUNDARY )
{
fNext3_IsPunct = true;
}
else
{
fNext3_IsPunct = false;
}
}
else
{
next3_POS = MS_Unknown;
fNext3_IsPunct = false;
}
//------------------------------------------------------------------------
// Is phrase a yes/no question?
//------------------------------------------------------------------------
if( punctDistance == 1 )
{
if( (cur_POS == MS_Interr) || (fIsAlphaWH) )
{
//---------------------------------
// It's a "WH" question
//---------------------------------
fIsYesNo = false;
}
else if( (cur_POS == MS_Prep) || (cur_POS == MS_Conj) || (cur_POS == MS_CConj) )
{
fMaybeWH = true;
}
}
else if( (punctDistance == 2) && (fMaybeWH) &&
((cur_POS == MS_Interr) || (cur_POS == MS_RelPron) || (fIsAlphaWH)) )
{
fIsYesNo = false;
}
//+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
// SUB_BOUNDARY_1: Insert boundary after sentence-initial adverb
//
// Reluctantly __the cat sat on the mat.
//+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
if( fInitial_Adv )
{
cur_Bnd = SUB_BOUNDARY_1;
fInitial_Adv = false;
bndNum = BND_PhraseRule1;
accNum = ACC_PhraseRule1;
}
else
{
if( (punctDistance == 1) &&
(cur_POS == MS_Adv) && (next_POS == MS_Det) )
// include
//LEX_SUBJPRON // he
//LEX_DPRON // this
//LEX_IPRON // everybody
//NOT LEX_PPRON // myself
{
fInitial_Adv = true;
}
else
{
fInitial_Adv = false;
}
//+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
// SUB_BOUNDARY_2:Insert boundary before coordinating conjunctions
// The cat sat on the mat __and cleaned his fur.
//
//+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
if( (cur_POS == MS_CConj) &&
(fHasDet == false) &&
(punctDistance > 3) &&
(next2_POS != MS_Conj) )
{
cur_Bnd = SUB_BOUNDARY_2;
bndNum = BND_PhraseRule2;
accNum = ACC_PhraseRule2;
}
//+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
// SUB_BOUNDARY_2:Insert boundary before adverb
// The cat sat on the mat __reluctantly.
//+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
else if( (cur_POS == MS_Adv) &&
(punctDistance > 4) &&
(next_POS != MS_Adj) )
{
cur_Bnd = SUB_BOUNDARY_2;
bndNum = BND_PhraseRule3;
accNum = ACC_PhraseRule3;
}
//+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
// SUB_BOUNDARY_2:Insert boundary after object pronoun
// The cat sat with me__ on the mat.
//
//+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
else if( (prev_POS == MS_ObjPron) && (punctDistance > 2))
{
cur_Bnd = SUB_BOUNDARY_2;
bndNum = BND_PhraseRule4;
accNum = ACC_PhraseRule4;
}
//+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
// SUB_BOUNDARY_2:Insert boundary before subject pronoun or contraction
// The cat sat on the mat _I see.
// The cat sat on the mat _I'm sure.
//
//+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
else if( ((cur_POS == MS_SubjPron) || (cur_POS == MS_Contr) ) &&
(punctDistance > 3) && (prev_POS != MS_RelPron) && (prev_POS != MS_Conj))
{
cur_Bnd = SUB_BOUNDARY_2;
bndNum = BND_PhraseRule5;
accNum = ACC_PhraseRule5;
}
//+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
// SUB_BOUNDARY_2:Insert boundary before interr
// The cat sat on the mat _how odd.
//+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
else if( (cur_POS == MS_Interr) && (punctDistance > 4) )
{
cur_Bnd = SUB_BOUNDARY_2;
bndNum = BND_PhraseRule6;
accNum = ACC_PhraseRule6;
}
//+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
// SUB_BOUNDARY_3:Insert boundary after subject noun phrase followed by aux verb
//+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
//+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
// SUB_BOUNDARY_3:Insert boundary before vaux after noun phrase
// The gray cat __should sit on the mat.
//+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
else if( (punctDistance > 2) &&
( ((prev_POS == MS_Noun) || (prev_POS == MS_Verb)) && (prev_POS != MS_VAux) ) &&
(cur_POS == MS_VAux)
)
{
cur_Bnd = SUB_BOUNDARY_3;
bndNum = BND_PhraseRule7;
accNum = ACC_PhraseRule7;
}
//+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
// SUB_BOUNDARY_3:Insert boundary after MS_Interr
// The gray cat __should sit on the mat.
// SEE ABOVE???
//+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
/*else if( (prev_POS == MS_Noun) && ((next_POS != MS_RelPron) &&
(next_POS != MS_VAux) && (next_POS != MS_RVAux) &&
(next2_POS != MS_VAux) && (next2_POS != MS_RVAux)) &&
(punctDistance > 4) &&
((cur_POS == MS_VAux) || (cur_POS == MS_RVAux)))
{
cur_Bnd = SUB_BOUNDARY_3;
bndNum = BND_PhraseRule8;
accNum = ACC_PhraseRule8;
}*/
//+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
// SUB_BOUNDARY_3:Insert boundary after MS_Interr
// The cat sat on the mat _how odd.
//+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
else if( (prev_POS == MS_Noun) && (next_POS != MS_RelPron) &&
(next_POS != MS_Conj) &&
(next_POS != MS_CConj) && (punctDistance > 3) && (cur_POS == MS_Verb))
{
cur_Bnd = SUB_BOUNDARY_3;
bndNum = BND_PhraseRule9;
accNum = ACC_PhraseRule9;
}
//+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
// SUB_BOUNDARY_3:Insert boundary after MS_Interr
// The cat sat on the mat _how odd.
//+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
/*else if( (prev_POS == MS_Noun) && (cur_POS != MS_RelPron) &&
(cur_POS != MS_RVAux) && (cur_POS != MS_CConj) &&
(cur_POS != MS_Conj) && (punctDistance > 2) &&
((punctDistance > 2) || (fIsShortSent)) && (cur_POS == MS_Verb))
{
cur_Bnd = SUB_BOUNDARY_3;
bndNum = BND_PhraseRule10;
accNum = ACC_PhraseRule10;
}
//+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
// SUB_BOUNDARY_4:Insert boundary before conjunction
//+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
else if( ((cur_POS == MS_Conj) && (punctDistance > 3) &&
(fNext_IsPunct == false) &&
(prev_POS != MS_Conj) && (prev_POS != MS_CConj) &&
(fNext2_IsPunct == false)) ||
( (prev_POS == MS_VPart) && (cur_POS != MS_Prep) &&
(cur_POS != MS_Det) &&
(punctDistance > 2) &&
((cur_POS == MS_Noun) || (cur_POS == MS_Noun) || (cur_POS == MS_Adj))) ||
( (cur_POS == MS_Interr) && (punctDistance > 2) &&
(cur_POS == MS_SubjPron)) )
{
cur_Bnd = SUB_BOUNDARY_4;
bndNum = BND_PhraseRule11;
accNum = ACC_PhraseRule11;
}
//+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
// SUB_BOUNDARY_5:Insert boundary before relative pronoun
//+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
else if( ( (cur_POS == MS_RelPron) && (punctDistance >= 3) &&
(prev_POS != MS_Prep) && (next3_POS != MS_VAux) &&
(next3_POS != MS_RVAux) &&
( (prev_POS == MS_Noun) || (prev_POS == MS_Verb) ) ) ||
( (cur_POS == MS_Quant) && (punctDistance > 5) &&
(prev_POS != MS_Adj) && (prev_POS != MS_Det) &&
(prev_POS != MS_VAux) && (prev_POS != MS_RVAux) &&
(prev_POS != MS_Det) && (next2_POS != MS_CConj) &&
(fNext_IsPunct == false)))
{
cur_Bnd = SUB_BOUNDARY_5;
bndNum = BND_PhraseRule12;
accNum = ACC_PhraseRule12;
}*/
//+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
// SUB_BOUNDARY_6:Silverman87-style, content/function tone group boundaries.
// Does trivial sentence-final function word look-ahead check.
//+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
else if( ( (prev_POS == MS_Noun) || (prev_POS == MS_Verb) || (prev_POS == MS_Adj) || (prev_POS == MS_Adv))
&& ((cur_POS != MS_Noun) && (cur_POS != MS_Verb) && (cur_POS != MS_Adj) && (cur_POS != MS_Adv))
&& (fNext_IsPunct == false))
{
cur_Bnd = SUB_BOUNDARY_6;
bndNum = BND_PhraseRule13;
accNum = ACC_PhraseRule13;
}
}
//------------------------------------------------------------------------
// If phrasing was found, save it
//------------------------------------------------------------------------
if( (cur_Bnd != NULL_BOUNDARY) && (iCurWord > 0) &&
//!(fNext_IsPunct) &&
!(prev_Punct) &&
(pCur_Tok->m_TuneBoundaryType == NULL_BOUNDARY) )
{
//pCur_Tok->m_TuneBoundaryType = cur_Bnd;
pTempTok = new CFEToken;
if( pTempTok )
{
pTempTok->m_TuneBoundaryType = cur_Bnd;
pTempTok->phon_Len = 1;
pTempTok->phon_Str[0] = _SIL_;
pTempTok->srcPosition = pCur_Tok->srcPosition;
pTempTok->srcLen = pCur_Tok->srcLen;
pTempTok->tokStr[0] = '+'; // punctuation
pTempTok->tokStr[1] = 0; // delimiter
pTempTok->tokLen = 1;
pTempTok->m_TermSil = 0;
pTempTok->m_DurScale = 0;
if( pPrev_Tok )
{
pPrev_Tok->m_AccentSource = accNum;
pPrev_Tok->m_BoundarySource = bndNum;
pPrev_Tok->m_Accent = K_LHSTAR;
}
pTempTok->m_SilenceSource = SIL_SubBound;
if( pPrev_Tok )
{
//pTempTok->m_DurScale = pPrev_Tok->m_DurScale;
pTempTok->m_ProsodyDurScale = pPrev_Tok->m_ProsodyDurScale;
pTempTok->user_Volume = pPrev_Tok->user_Volume;
}
else
{
//pTempTok->m_DurScale = 1.0f;
pTempTok->m_ProsodyDurScale = 1.0f;
}
m_TokList.InsertBefore( m_TokList.FindIndex( iCurWord ), pTempTok );
pCur_Tok = pTempTok;
m_cNumOfWords++;
cNumOfWords++;
iCurWord++;
}
}
//-------------------------------
// Process sentence punctuation
//-------------------------------
AdjustQuestTune( pCur_Tok, fIsYesNo );
//-------------------------------
// Prepare for next word
//-------------------------------
prev_Punct = pCur_Tok->m_TuneBoundaryType;
prev_POS = cur_POS;
pPrev_Tok = pCur_Tok;
//------------------------------
// Shift the token pipeline
//------------------------------
pCur_Tok = pNext_Tok;
pNext_Tok = pNext2_Tok;
pNext2_Tok = pNext3_Tok;
if( listPos )
{
pNext3_Tok = m_TokList.GetNext( listPos );
}
else
{
pNext3_Tok = NULL;
}
//------------------------------------------------------------------------
// Keep track of when determiners encountered to help in deciding
// when to allow a strong 'and' boundary (SUB_BOUNDARY_2)
//------------------------------------------------------------------------
if( punctDistance > 2)
{
fHasDet = false;
}
if( cur_POS == MS_Det )
{
fHasDet = true;
}
}
//-------------------------------------
// Process final sentence punctuation
//-------------------------------------
pCur_Tok = (CFEToken*)m_TokList.GetTail();
AdjustQuestTune( pCur_Tok, fIsYesNo );
}
} /* CFrontend::DoPhrasing */
/*****************************************************************************
* CFrontend::RecalcProsody *
*--------------------------*
* Description:
* In response to a real-time rate change, recalculate duration and pitch
*
********************************************************************** MC ***/
#ifdef USE_VOICEDATAOBJ
void CFrontend::RecalcProsody()
{
SPDBG_FUNC( "CFrontend::RecalcProsody" );
UNITINFO* pu;
CAlloCell* pCurCell;
ULONG k;
//--------------------------------------------
// Compute new allo durations
//--------------------------------------------
/*pCurCell = m_pAllos->GetHeadCell();
while( pCurCell )
{
//pCurCell->m_DurScale = 1.0;
pCurCell = m_pAllos->GetNextCell();
}*/
m_DurObj.AlloDuration( m_pAllos, m_RateRatio_API );
//--------------------------------------------
// Modulate allo pitch
//--------------------------------------------
m_PitchObj.AlloPitch( m_pAllos, m_BasePitch, m_PitchRange );
pu = m_pUnits;
pCurCell = m_pAllos->GetHeadCell();
while( pCurCell )
{
pu->duration = pCurCell->m_ftDuration;
for( k = 0; k < pu->nKnots; k++ )
{
pu->pTime[k] = pCurCell->m_ftTime[k] * m_SampleRate;
pu->pF0[k] = pCurCell->m_ftPitch[k];
pu->pAmp[k] = pu->ampRatio;
}
pu++;
pCurCell = m_pAllos->GetNextCell();
}
} /* CFrontend::RecalcProsody */
#endif
/*****************************************************************************
* CFrontend::NextData *
*---------------------*
* Description:
* This gets called from the backend when UNIT stream is dry.
* Parse TOKENS to ALLOS to UNITS
*
********************************************************************** MC ***/
HRESULT CFrontend::NextData( void **pData, SPEECH_STATE *pSpeechState )
{
SPDBG_FUNC( "CFrontend::NextData" );
bool haveNewRate = false;
HRESULT hr = S_OK;
//-----------------------------------
// First, check and see if SAPI has an action
//-----------------------------------
// Check for rate change
long baseRateRatio;
if( m_pOutputSite->GetActions() & SPVES_RATE )
{
hr = m_pOutputSite->GetRate( &baseRateRatio );
if ( SUCCEEDED( hr ) )
{
if( baseRateRatio > SPMAX_VOLUME )
{
//--- Clip rate to engine maximum
baseRateRatio = MAX_USER_RATE;
}
else if ( baseRateRatio < MIN_USER_RATE )
{
//--- Clip rate to engine minimum
baseRateRatio = MIN_USER_RATE;
}
m_RateRatio_API = CntrlToRatio( baseRateRatio );
haveNewRate = true;
}
}
//---------------------------------------------
// Async stop?
//---------------------------------------------
if( SUCCEEDED( hr ) && ( m_pOutputSite->GetActions() & SPVES_ABORT ) )
{
m_SpeechState = SPEECH_DONE;
}
//---------------------------------------------
// Async skip?
//---------------------------------------------
if( SUCCEEDED( hr ) && ( m_pOutputSite->GetActions() & SPVES_SKIP ) )
{
SPVSKIPTYPE SkipType;
long SkipCount = 0;
hr = m_pOutputSite->GetSkipInfo( &SkipType, &SkipCount );
if ( SUCCEEDED( hr ) && SkipType == SPVST_SENTENCE )
{
IEnumSENTITEM *pGarbage;
//--- Skip Forwards
if ( SkipCount > 0 )
{
long OriginalSkipCount = SkipCount;
while ( SkipCount > 1 &&
( hr = m_pEnumSent->Next( &pGarbage ) ) == S_OK )
{
SkipCount--;
pGarbage->Release();
}
if ( hr == S_OK )
{
hr = ParseSentence( eNEXT );
if ( SUCCEEDED( hr ) )
{
SkipCount--;
}
}
else if ( hr == S_FALSE )
{
m_SpeechState = SPEECH_DONE;
}
SkipCount = OriginalSkipCount - SkipCount;
}
//--- Skip Backwards
else if ( SkipCount < 0 )
{
long OriginalSkipCount = SkipCount;
while ( SkipCount < -1 &&
( hr = m_pEnumSent->Previous( &pGarbage ) ) == S_OK )
{
SkipCount++;
pGarbage->Release();
}
if ( hr == S_OK )
{
hr = ParseSentence( ePREVIOUS );
// This case is different from the forward skip, needs to test that
// Parse sentence found something to parse!
if ( SUCCEEDED( hr ) && m_SpeechState != SPEECH_DONE)
{
SkipCount++;
}
}
else if ( hr == S_FALSE )
{
m_SpeechState = SPEECH_DONE;
}
SkipCount = OriginalSkipCount - SkipCount;
}
//--- Skip to beginning of this sentence
else
{
m_CurUnitIndex = 0;
}
hr = m_pOutputSite->CompleteSkip( SkipCount );
}
}
//---------------------------------------------
// Make sure we're still speaking
//---------------------------------------------
if( SUCCEEDED( hr ) && m_SpeechState != SPEECH_DONE )
{
/*****
if( m_CurUnitIndex >= m_unitCount)
{
//-----------------------------------
// Get next sentence from Normalizer
//-----------------------------------
hr = ParseSentence( eNEXT );
//m_SpeechState = SPEECH_DONE;
}
else if( haveNewRate )
{
//-----------------------------------
// Recalculate prosody to new rate
//-----------------------------------
RecalcProsody();
}
if( SUCCEEDED(hr) )
{
if( m_SpeechState != SPEECH_DONE )
{
//-----------------------------------
// Get next phon
//-----------------------------------
m_pUnits[m_CurUnitIndex].hasSpeech = m_HasSpeech;
*pData =( void*)&m_pUnits[m_CurUnitIndex];
m_CurUnitIndex++;
}
}
*****/
hr = ParseSentence( eNEXT );
if ( SUCCEEDED( hr ) && m_SpeechState == SPEECH_CONTINUE )
{
SentenceData *pSentData = new SentenceData;
pSentData->pPhones = new Phone[ m_pAllos->GetCount() ];
ZeroMemory( pSentData->pPhones, m_pAllos->GetCount() * sizeof( Phone ) );
pSentData->ulNumPhones = m_pAllos->GetCount();
m_PitchObj.GetContour( &pSentData->pf0, &pSentData->ulNumf0 );
float RunTime = 0.0;
float InitialSil = 0.0;
bool fInitialSil = true;
char ph[512];
typedef const char *(*MapPhoneSetFunc) (ALLO_CODE);
MapPhoneSetFunc MapPhoneSet;
if (m_fNewPhoneSet)
{
MapPhoneSet = NewMapPhoneSet;
}
else
{
MapPhoneSet = OldMapPhoneSet;
}
for ( int i = 0; i < m_pAllos->GetCount(); i++ )
{
CAlloCell *pCurCell = m_pAllos->GetCell( i );
strcpy ( ph, MapPhoneSet( pCurCell->m_allo ) );
//--- adding stress info for vowels
// if ( ( pCurCell->m_ctrlFlags & PRIMARY_STRESS ) && IsVowel ( ph ) )
// {
// strcat( ph, "s");
// }
strcpy( pSentData->pPhones[i].phone, ph );
//--- Skip initial SIL
if ( fInitialSil &&
stricmp( pSentData->pPhones[i].phone, "sil" ) == 0 )
{
InitialSil += pCurCell->m_ftDuration;
pSentData->pPhones[i].f0 = 0;
pSentData->pPhones[i].end = InitialSil;
continue;
}
//--- Skip final SIL
else if ( i == m_pAllos->GetCount() - 1 &&
stricmp( pSentData->pPhones[i].phone, "sil" ) == 0 )
{
pSentData->pPhones[i].end = RunTime + InitialSil;
break;
}
else
{
fInitialSil = false;
pSentData->pPhones[i].f0 =
GetPhoneF0( pSentData->pf0, RunTime, pCurCell->m_ftDuration );
RunTime += pCurCell->m_ftDuration;
pSentData->pPhones[i].end = RunTime + InitialSil;
}
}
*pData = (void*) pSentData;
}
}
//-------------------------------------------
// Let client know if text input is dry
//-------------------------------------------
*pSpeechState = m_SpeechState;
return hr;
} /* CFrontend::NextData */
/*****************************************************************************
* IsVowel *
*----------*
*
*********************************************************************** WD ***/
bool IsVowel ( char* ph )
{
if ( ph )
{
if ( ph[0] == 'a' || ph[0] == 'e' || ph[0] == 'i' || ph[0] == 'o' ||
ph[0] == 'u' )
{
return true;
}
}
return false;
}