/****************************************************************************** * SpeakerData.cpp * *-----------------* * *------------------------------------------------------------------------------ * Copyright (c) 1997 Entropic Research Laboratory, Inc. * Copyright (C) 1998 Entropic, Inc * Copyright (C) 2000 Microsoft Corporation Date: 03/02/00 * All Rights Reserved * ********************************************************************* PACOG ***/ #include "SpeakerData.h" #include "UnitSearch.h" #include "clusters.h" #include "trees.h" #include "VqTable.h" #include "BeVersion.h" #include "vapiio.h" #include "SynthUnit.h" #include "BackEnd.h" #include #ifdef WIN32 #include #endif #define MAX_LINE 512 struct UnitSamples { char* samples; int nSamples; Epoch* epochs; int nEpochs; }; CList CSpeakerData::m_speakers; CComAutoCriticalSection CSpeakerData::m_critSect; /***************************************************************************** * CSpeakerData::ClassFactory * *----------------------------* * Description: * ******************************************************************* PACOG ***/ CSpeakerData* CSpeakerData::ClassFactory (const char* pszFileName, bool fCheckVersion) { CSpeakerData* pSpkrData = 0; assert (pszFileName); m_critSect.Lock(); if ((m_speakers.Find(pszFileName, pSpkrData)) != 0) { pSpkrData->AddRef(); } else if (( pSpkrData = new CSpeakerData(pszFileName) ) != 0) { m_speakers.PushBack(pszFileName, pSpkrData); pSpkrData->AddRef(); if (!pSpkrData->Load (fCheckVersion) ) { delete pSpkrData; pSpkrData = 0; } } m_critSect.Unlock(); return pSpkrData; } /***************************************************************************** * CSpeakerData::AddRef * *-------------------------* * Description: * ******************************************************************* PACOG ***/ void CSpeakerData::AddRef() { m_iRefCount++; } /***************************************************************************** * CSpeakerData::Release * *-----------------------* * Description: * ******************************************************************* PACOG ***/ void CSpeakerData::Release() { if (--m_iRefCount == 0) { delete this; } } /***************************************************************************** * CSpeakerData::CSpeakerData * *----------------------------* * Description: * ******************************************************************* PACOG ***/ CSpeakerData::CSpeakerData (const char* pszFileName) { strcpy(m_pszFileName, pszFileName); m_iRefCount = 0; m_iSampFreq = 0; m_iFormat = 0; m_pTrees = 0; m_pClusters = 0; m_pVq = 0; m_ttpPar.baseLine = 0; m_ttpPar.refLine = 0; m_ttpPar.topLine = 0; m_pFileNames = 0; m_iNumFileNames = 0; m_pUnits = 0; m_iNumUnits= 0; m_dRumTime = 0; m_fWaveConcat = false; m_fMSPhoneSet = false; m_fMSEntropic = false; } /***************************************************************************** * CSpeakerData::~CSpeakerData * *-----------------------------* * Description: * ******************************************************************* PACOG ***/ CSpeakerData::~CSpeakerData () { m_critSect.Lock(); for (int i = 0; iLoadFromFile (fin)) { goto end; } if ((m_pClusters = new CClusters) == 0) { goto end; } if (!m_pClusters->LoadFromFile(fin)) { goto end; } while (fgets (line, MAX_LINE, fin)) { // Gain table, vq table and ttp params are optional if (strcmp(line, "#Slm Weights\n") == 0) { if (!LoadWeights (fin)) { goto end; } } else if (strcmp(line, "#Slm New Weights\n") == 0) { if (!LoadNewWeights (fin)) { goto end; } } else if ( strcmp(line, "#Waveform Concatenation\n") == 0) { m_fWaveConcat = true; } else if ( strcmp(line, "#MS Phone Set\n") == 0) { m_fMSPhoneSet = true; } else if ( strcmp(line, "#Slm AvgGain Table\n") == 0) { if (!m_pClusters->LoadGainTable (fin)) { goto end; } } else if ( strcmp(line, "#Slm VQ Table\n") == 0) { if ((m_pVq = new CVqTable) == 0) { goto end; } if (!m_pVq->LoadFromFile(fin)) { goto end; } } else if ( strcmp(line, "#Ttp Params\n") == 0) { if (!LoadTtpParam (fin)) { goto end; } } else if ( strcmp(line, "#Samples\n") == 0) { if (!LoadSamples (fin)) { goto end; } } else if ( strcmp(line, "#File names\n") == 0) { if (!LoadFileNames (fin)) { goto end; } } else { goto end; } } return 1; end: return 0; } /***************************************************************************** * CSpeakerData::LoadFileNames * *--------------------------------* * Description: * ******************************************************************* PACOG ***/ int CSpeakerData::LoadFileNames (FILE* fp) { char fileName[_MAX_PATH+1]; assert (fp); if (fscanf (fp, "%d\n", &m_iNumFileNames)!=1) { return 0; } if ((m_pFileNames = new CDynString[m_iNumFileNames]) == 0) { return 0; } for (int i=0; iPreComputeDist(m_weights.dur, m_weights.rms, m_weights.lkl); } if (m_pVq) { m_pVq->Scale (m_weights.cont); } m_critSect.Unlock(); } /***************************************************************************** * CSpeakerData::GetTtpParam * *---------------------------* * Description: * ******************************************************************* PACOG ***/ void CSpeakerData::GetTtpParam (int* piBaseLine, int* piRefLine, int* piTopLine) { assert (piBaseLine && piRefLine && piTopLine); *piBaseLine = m_ttpPar.baseLine; *piRefLine = m_ttpPar.refLine; *piTopLine = m_ttpPar.topLine; } /***************************************************************************** * CSpeakerData::LoadSamples * *------------------------------* * Description: * ******************************************************************* PACOG ***/ int CSpeakerData::LoadSamples (FILE* fin) { int sampSize; assert (fin); if (!fread (&m_iNumUnits, sizeof(m_iNumUnits), 1, fin)) { return 0; } if (!fread (&m_iFormat, sizeof(m_iFormat), 1, fin)) { return 0; } sampSize = VapiIO::SizeOf (m_iFormat); if (!fread (&m_iSampFreq, sizeof(m_iSampFreq), 1, fin)) { return 0; } if ((m_pUnits = new UnitSamples[m_iNumUnits]) == 0) { return 0; } for (int i=0; iisFileName) { pSynth = UnitFromFile (pDescript); } else { pSynth = UnitFromMemory (pDescript); } return pSynth; } /***************************************************************************** * CSpeakerData::UnitFromFile * *----------------------------* * Description: * ******************************************************************* PACOG ***/ CSynth* CSpeakerData::UnitFromFile (ChkDescript* chunk) { CSynth* pUnit; short *psSamples = NULL; int iNumSamples; Epoch* pEpochs; int iNumEpochs; int iSampFreq; int i; assert (chunk); if ( !ReadSamples(chunk->chunk.fileName, chunk->from, chunk->to, &psSamples, &iNumSamples, &pEpochs, &iNumEpochs, &iSampFreq) ) { fprintf (stderr, "Error accessing file %s\n", chunk->chunk.fileName); } if ((pUnit = new CSynth(iSampFreq)) == 0) { return 0; } if ((pUnit->m_pdSamples = new double[pUnit->m_iNumSamples]) == 0) { return 0; } for (i=0; im_iNumSamples; i++) { pUnit->m_pdSamples[i] = psSamples[i]; } delete []psSamples; pUnit->m_iNumSamples = iNumSamples; pUnit->m_pEpochs = pEpochs; pUnit->m_iNumEpochs = iNumEpochs; //Override sampling frequency and sample format // m_iSampFreq = iSampFreq; // m_iSampFormat = VAPI_PCM16; strcpy ( pUnit->m_pszChunkName, chunk->name ); pUnit->m_dF0Ratio = chunk->f0Ratio; pUnit->m_dGain = chunk->gain; /* --- use unit duration as target duration if ( m_fWaveConcat ) { m_dRumTime += pEpochs[ iNumEpochs - 1 ].time; pUnit->m_dRunTimeLimit = m_dRumTime; } else { pUnit->m_dRunTimeLimit = chunk->end; } */ pUnit->m_dRunTimeLimit = chunk->end; return pUnit; } /***************************************************************************** * CSpeakerData::UnitFromMemory * *------------------------------* * Description: * ******************************************************************* PACOG ***/ CSynth* CSpeakerData::UnitFromMemory (ChkDescript* pChunk) { CSynth* pUnit; UnitSamples* pUSamples; int iFirstEpoch; int iLastEpoch; int firstSamp; int lastSamp; double from; double to; int i; assert (pChunk); if ((pUnit = new CSynth(m_iSampFreq)) == 0) { return 0; } if (!m_pUnits || pChunk->chunk.chunkIdx<0 || pChunk->chunk.chunkIdx>=m_iNumUnits) { return 0; } pUSamples = m_pUnits + pChunk->chunk.chunkIdx; strcpy ( pUnit->m_pszChunkName, pChunk->name ); from = pChunk->from; to = pChunk->to; do { for (iFirstEpoch=0; iFirstEpochnEpochs && from > pUSamples->epochs[iFirstEpoch].time; iFirstEpoch++) { //Empty block } if (iFirstEpoch && from < pUSamples->epochs[iFirstEpoch].time) { iFirstEpoch--; } for (iLastEpoch = iFirstEpoch; iLastEpoch<(pUSamples->nEpochs-1) && to > pUSamples->epochs[iLastEpoch].time; iLastEpoch++) { //Empty block } pUnit->m_iNumEpochs = iLastEpoch - iFirstEpoch +1; from-= 0.005; to += 0.005; } while (pUnit->m_iNumEpochs < 3 && (iFirstEpoch >0 || iLastEpoch < (pUSamples->nEpochs -1))); if ((pUnit->m_pEpochs = new Epoch[pUnit->m_iNumEpochs]) == NULL) { return 0; } memcpy(pUnit->m_pEpochs, pUSamples->epochs + iFirstEpoch, pUnit->m_iNumEpochs * sizeof(*pUnit->m_pEpochs)); firstSamp = (int)(pUnit->m_pEpochs[0].time * m_iSampFreq); lastSamp = (int)(pUnit->m_pEpochs[pUnit->m_iNumEpochs-1].time * m_iSampFreq + 0.5); if (lastSamp >= pUSamples->nSamples) { lastSamp = pUSamples->nSamples -1; } pUnit->m_iNumSamples = lastSamp - firstSamp +1; if ((pUnit->m_pdSamples = new double[pUnit->m_iNumSamples]) == 0) { return 0; } { char* pcInPtr; short* pnBuffer; // pUSamples->samples is a char*, add correct number of bytes. pcInPtr = pUSamples->samples + (firstSamp * VapiIO::SizeOf(m_iFormat)); if (m_iFormat != VAPI_PCM16) { if ((pnBuffer = new short[pUnit->m_iNumSamples]) == 0) { return 0; } VapiIO::DataFormatConversion (pcInPtr, m_iFormat, (char*)pnBuffer, VAPI_PCM16, pUnit->m_iNumSamples); } else { pnBuffer = (short*)pcInPtr; } for (i=0; i< pUnit->m_iNumSamples; i++) { pUnit->m_pdSamples[i] = (double)pnBuffer[i]; } if (pnBuffer != (short*)pcInPtr) { delete[] pnBuffer; } } for (i=0; im_iNumEpochs; i++) { pUnit->m_pEpochs[i].time -= (firstSamp /(double)m_iSampFreq); } pUnit->m_dF0Ratio = pChunk->f0Ratio; pUnit->m_dGain = pChunk->gain; /* --- use unit duration as target duration if ( m_fWaveConcat ) { m_dRumTime += pUnit->m_pEpochs[pUnit->m_iNumEpochs-1].time; pUnit->m_dRunTimeLimit = m_dRumTime; } else { pUnit->m_dRunTimeLimit = pChunk->end; } */ pUnit->m_dRunTimeLimit = pChunk->end; return pUnit; } /***************************************************************************** * CSpeakerData::ReadSamples * *---------------------------* * Description: * Given some basic information about unit required, it reads the unit in * and gets the epochs and the samples ******************************************************************* PACOG ***/ int CSpeakerData::ReadSamples (const char* pszPathName, // Base file with directory path double dFrom, // Starting time of unit double dTo, // Ending time of unit short** ppnSamples, // Read samples int* piNumSamples, // Number of samples read Epoch** ppEpochs, // Array of Epochs int* piNumEpochs, // Number of Epochs read int* piSampFreq) // Sampling Frequence { VapiIO* pViof = 0; int iRetVal = VAPI_IOERR_NOERROR; int iSampFormat; Epoch* pEpochBuffer = NULL; int iNumReadEpochs; int iFirstEpoch; int iLastEpoch; double dStartTime; char* pcBuffer = 0; int i; assert (pszPathName); assert (dFrom>=0.0); assert (dTo == -1.0 || dTo >= dFrom); assert (ppnSamples); assert (piNumSamples); assert (ppEpochs); assert (piNumEpochs); assert (piSampFreq); if (( pViof = VapiIO::ClassFactory()) == 0) { iRetVal = VAPI_IOERR_MEMORY; goto error; } if ( (iRetVal = pViof->OpenFile (pszPathName, VAPI_IO_READ)) != VAPI_IOERR_NOERROR) { goto error; } if ( (iRetVal = pViof->Format (piSampFreq, &iSampFormat)) != VAPI_IOERR_NOERROR) { goto error; } // Read epochs first if ((iRetVal = pViof->ReadEpochs(&pEpochBuffer, &iNumReadEpochs)) != VAPI_IOERR_NOERROR) { goto error; } do { for (iFirstEpoch = 0; iFirstEpoch < iNumReadEpochs && dFrom > pEpochBuffer[iFirstEpoch].time; iFirstEpoch++) { //Empty loop } if (iFirstEpoch && dFrom < pEpochBuffer[iFirstEpoch].time) { iFirstEpoch--; } if (iFirstEpoch >= iNumReadEpochs) { iFirstEpoch--; } for (iLastEpoch = iFirstEpoch; iLastEpoch < iNumReadEpochs && dTo > pEpochBuffer[iLastEpoch].time; iLastEpoch++) { //Emtpy loop } if (iLastEpoch >= iNumReadEpochs) { iLastEpoch--; } *piNumEpochs = iLastEpoch - iFirstEpoch + 1; dFrom -= 0.005; dTo += 0.005; } while (*piNumEpochs < 3 && (iFirstEpoch > 0 || iLastEpoch < (iNumReadEpochs -1))); if ((*ppEpochs = new Epoch[*piNumEpochs]) == 0) { iRetVal = VAPI_IOERR_MEMORY; goto error; } memcpy (*ppEpochs, pEpochBuffer + iFirstEpoch, *piNumEpochs * sizeof (**ppEpochs)); dFrom = pEpochBuffer[iFirstEpoch].time; dTo = pEpochBuffer[iLastEpoch].time; // CAREFUL! We need to reset the epochs to the starting time // of the chunk! dStartTime = ((int)(dFrom * *piSampFreq))/ (double)*piSampFreq; for (i=0; i<*piNumEpochs; i++) { (*ppEpochs)[i].time -= dStartTime; } delete[] pEpochBuffer; pEpochBuffer = NULL; if ( (iRetVal = pViof->ReadSamples (dFrom, dTo, (void**)&pcBuffer, piNumSamples, 1)) != VAPI_IOERR_NOERROR) { goto error; } pViof->CloseFile (); delete pViof; pViof = 0; // Convert the samples to output format if ( (*ppnSamples = (short*)new char [*piNumSamples * VapiIO::SizeOf(VAPI_PCM16)]) == NULL) { iRetVal = VAPI_IOERR_MEMORY; goto error; } VapiIO::DataFormatConversion ((char *)pcBuffer, iSampFormat, (char*) *ppnSamples, VAPI_PCM16, *piNumSamples); delete[] pcBuffer; return 1; error: if (pViof) { pViof->CloseFile (); delete pViof; } if (pEpochBuffer) { delete[] pEpochBuffer; } if (pcBuffer) { delete[] pcBuffer; } return iRetVal; } /***************************************************************************** * CSpeakerData::SetF0Weight * *--------------------------* * Description: * ******************************************************************* PACOG ***/ void CSpeakerData::SetF0Weight (float fWeight) { m_weights.f0 = fWeight; } /***************************************************************************** * CSpeakerData::SetDurWeight * *---------------------------* * Description: * ******************************************************************* PACOG ***/ void CSpeakerData::SetDurWeight (float fWeight) { m_weights.dur = fWeight; } /***************************************************************************** * CSpeakerData::SetRmsWeight * *---------------------------* * Description: * ******************************************************************* PACOG ***/ void CSpeakerData::SetRmsWeight (float fWeight) { m_weights.rms = fWeight; } /***************************************************************************** * CSpeakerData::SetLklWeight * *---------------------------* * Description: * ******************************************************************* PACOG ***/ void CSpeakerData::SetLklWeight (float fWeight) { m_weights.lkl = fWeight; } /***************************************************************************** * CSpeakerData::SetContWeight * *----------------------------* * Description: * ******************************************************************* PACOG ***/ void CSpeakerData::SetContWeight (float fWeight) { m_weights.cont = fWeight; } /***************************************************************************** * CSpeakerData::SetSameWeight * *----------------------------* * Description: * ******************************************************************* PACOG ***/ void CSpeakerData::SetSameWeight (float fWeight) { m_weights.sameSeg = fWeight; } /***************************************************************************** * CSpeakerData::SetPhBdrWeight * *------------------------------* * Description: * ******************************************************************* WD ***/ void CSpeakerData::SetPhBdrWeight (float fWeight) { m_weights.phBdr = fWeight; } /***************************************************************************** * CSpeakerData::SetF0BdrWeight * *------------------------------* * Description: * ******************************************************************* WD ***/ void CSpeakerData::SetF0BdrWeight (float fWeight) { m_weights.f0Bdr = fWeight; } /***************************************************************************** * CSpeakerData::GetWeights * *--------------------------* * Description: * ******************************************************************* PACOG ***/ Weights CSpeakerData::GetWeights () { return m_weights; }