windows-nt/Source/XPSP1/NT/enduser/speech/tts/tools/filedist/main.cpp
2020-09-26 16:20:57 +08:00

379 lines
9.7 KiB
C++

/*
*===========================================================================
*
* main.c
*
* This material contains unpublished, proprietary software of
* Entropic, Inc. Any reproduction, distribution, or publication
* of this work must be authorized in writing by Entropic, Inc.,
* and must bear the notice:
*
* "Copyright (c) 1998 Entropic, Inc. All rights reserved"
*
* The copyright notice above does not evidence any actual or intended
* publication of this source code.
*
* rcs_id: $Id: main.c,v 1.1 1999/10/12 19:44:42 galanes Exp $
*
*
*
*===================================================mplumpe 12/19/00========================
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <sigproc.h>
#include <math.h>
#include "viterbi.h"
typedef char TName[_MAX_PATH+1];
#define SYNTAX fprintf(stderr, "USAGE: fileDist file1 file2 (DTW) (DELTA:wt) (output.txt)\n\n")
double* ReadInputFile (char *fName, int *nFrames, int *frameSize);
int *FindOptimalPath (double *adOriginal, int iOrigLen, double *adSynth, int iSynthLen, int iDim);
// globals for cost functions
int giMaxShift=-1;
int giOrigLen=-1;
int giSynthLen=-1;
int giDim=-1;
double *gadOriginal=NULL;
double *gadSynth=NULL;
float *gafConcatCosts=NULL;
/*
*-----------------------------------------------------------------------------
*
* MAIN
*
*-----------------------------------------------------------------------------
*/
int
main(int argc, char **argv)
{
FILE* output = stdout;
TName fName1;
TName fName2;
TName fName3 = "";
double* data1;
double* data2;
int nFrames1;
int nFrames2;
int frameSize1;
int frameSize2;
double distance = 0.0;
int i, j;
int cnt = 0;
int *aiOptimalPath;
bool fDTW = false;
bool fDeltaDist = false;
double dDeltaScale = 1.;
if ( argc < 3 || argc > 6 )
{
SYNTAX;
return 1;
}
strncpy (fName1, argv[1], _MAX_PATH);
strncpy (fName2, argv[2], _MAX_PATH);
if (argc == 4)
{
strncpy (fName3, argv[3], _MAX_PATH);
if (0 == strcmp (fName3, "DTW"))
{
fDTW = true;
fName3[0] = '\0';
}
else if (0 == strncmp (fName3, "DELTA:", 6))
{
fDeltaDist = true;
fName3[0] = '\0';
dDeltaScale = atof(fName3+6);
}
}
else if (argc == 5)
{
if (0 == strcmp (argv[3], "DTW"))
{
fDTW = true;
if (0 == strncmp (argv[4], "DELTA:", 6))
{
fDeltaDist = true;
dDeltaScale = atof(argv[4]+6);
}
else
{
strncpy (fName3, argv[4], _MAX_PATH);
}
}
else
{
if (0 == strncmp (argv[3], "DELTA:", 6))
{
fDeltaDist = true;
dDeltaScale = atof(argv[3]+6);
}
strncpy (fName3, argv[4], _MAX_PATH);
}
}
else if (argc == 6)
{
if (0 == strcmp (argv[3], "DTW"))
{
fDTW = true;
}
if (0 == strncmp (argv[4], "DELTA:", 6))
{
fDeltaDist = true;
dDeltaScale = atof(argv[4]+6);
}
strncpy (fName3, argv[5], _MAX_PATH);
}
/*
* read data
*/
data1 = ReadInputFile(fName1, &nFrames1, &frameSize1);
data2 = ReadInputFile(fName2, &nFrames2, &frameSize2);
if (frameSize1 != frameSize2)
{
fprintf(stderr, "Different data order between %s %s\n", fName1, fName2);
return 1;
}
if (fDTW)
{
//
// Find the optimal path - assumes the original is fName1
//
aiOptimalPath = FindOptimalPath (data1, nFrames1, data2, nFrames2, frameSize1);
}
else
{
if (nFrames2 < nFrames1)
{
nFrames1 = nFrames2;
}
aiOptimalPath = (int *)malloc (sizeof(int)*nFrames1);
for (i=0; i < nFrames1; i++)
{
aiOptimalPath[i] = i;
}
}
//
// Find the distance between the optimal path & the original
//
if (!fDeltaDist)
{
for (i = 0; i < nFrames1 ; i++)
{
/* only use voiced segments */
if (data1[i * frameSize1] > 0.8 || data2[aiOptimalPath[i] * frameSize2] > 0.8)
{
distance += EuclideanDist(&data1[i * frameSize1], &data2[aiOptimalPath[i] * frameSize2], frameSize1);
cnt++;
}
}
}
else // fDeltaDist
{
// just skip the first and last frames. These are surely silence and don't matter anyway.
// This makes delta calculations easier
double *adDelta1, *adDelta2;
adDelta1 = (double *)malloc (sizeof(double)*frameSize1);
adDelta2 = (double *)malloc (sizeof(double)*frameSize1);
for (i = 1; i < nFrames1-1 ; i++)
{
/* only use voiced segments */
if (data1[i * frameSize1] > 0.8 || data2[aiOptimalPath[i] * frameSize2] > 0.8)
{
distance += EuclideanDist(&data1[i * frameSize1], &data2[aiOptimalPath[i] * frameSize2], frameSize1);
for (j=0; j < frameSize1; j++)
{
adDelta1[j] = data1[(i+1)*frameSize1+j] - data1[(i-1)*frameSize1+j];
adDelta2[j] = data2[(aiOptimalPath[i]+1)*frameSize1+j] - data2[(aiOptimalPath[i]-1)*frameSize1+j];
}
distance += dDeltaScale * EuclideanDist(adDelta1, adDelta2, frameSize1);
cnt++;
}
}
free (adDelta1);
free (adDelta2);
}
free (aiOptimalPath);
if (cnt > 0)
{
distance /= cnt;
}
/*
* write result
*/
if (fName3[0])
{
if( (output = fopen(fName3, "wt")) == NULL)
{
fprintf(stderr, "Can not open file %s\n", fName3);
return 1;
}
}
fprintf(output, "%f", distance);
fclose(output);
free(data1);
free(data2);
return 0;
}
/*
*-----------------------------------------------------------------------------
*
* Read spectral data
*
*-----------------------------------------------------------------------------
*/
double *
ReadInputFile (char *fName, int *nFrames, int *frameSize)
{
FILE* fp;
int i;
double *data;
int type;
if( (fp = fopen(fName, "rb")) == NULL)
{
fprintf(stderr, "Can not open file %s\n", fName);
return NULL;
}
/* read header */
fread(&type, sizeof(int), 1, fp);
fread(frameSize, sizeof(int), 1, fp);
fread(nFrames, sizeof(int), 1, fp);
/* alloc memory */
data = (double *)malloc((*frameSize) * (*nFrames) * sizeof(double));
if (data == NULL)
{
fprintf(stderr, "Can not alloc memory \n");
return NULL;
}
/* read cep data */
for ( i = 0; i < *nFrames; i++ )
{
fread(&data[i * (*frameSize)], sizeof(double), (*frameSize), fp);
}
fclose(fp);
return data;
}
float _cdecl ConcatCost (const void *pElem1, const void *pElem2, float fUnitCost)
{
// check if it is a valid concat option
int i1 = (int)pElem1;
int i2 = (int)pElem2;
if (i1 > i2)
return 9E9f;
else if (i2-i1 > giMaxShift)
return 9e9f;
else
return fUnitCost*gafConcatCosts[i2-i1];
}
float _cdecl UnitCost (const void *pElem1, const int iOrigPos)
{
// Just the Euclidean distance
int iSynthPos = (int)pElem1;
iSynthPos--;
assert ((iOrigPos >=0) && (iOrigPos < giOrigLen));
assert ((iSynthPos >=0) && (iSynthPos < giSynthLen));
return (float)EuclideanDist(gadOriginal + iOrigPos * giDim, gadSynth + iSynthPos * giDim, giDim);
}
int *FindOptimalPath (double *adOriginal, int iOrigLen, double *adSynth, int iSynthLen, int iDim)
{
//
// Put the appropriate vectors into Viterbi, then call it
//
int i, j;
CViterbi Viterbi;
float fCost, fMidShift;
int *aiPath;
int iStart, iStop;
//
// Find ConcatCosts
//
giMaxShift = 2*iSynthLen/iOrigLen + 1;
fMidShift=(float)iSynthLen/(float)iOrigLen;
gafConcatCosts = (float *)malloc (sizeof(float)*(giMaxShift+1));
for (i=0; i <= giMaxShift; i++)
{
gafConcatCosts[i] = (float )(1.f+fabs(fMidShift-i)/fMidShift);
}
giOrigLen = iOrigLen;
giSynthLen = iSynthLen;
giDim = iDim;
gadOriginal = adOriginal;
gadSynth = adSynth;
Viterbi.Init (iOrigLen, 51);
// The passed in position always must be one greater, because 0 is a special tag for the viterbi algorithm
// Add endpoint constrants
Viterbi.Add (0, (void *)1);
Viterbi.Add (iOrigLen-1, (void *)iSynthLen);
// Add one more constraint to allow delta calculation
Viterbi.Add (1, (void *)2);
Viterbi.Add (iOrigLen-2, (void *)(iSynthLen-1));
// add intermediate options
for (i=2; i <= iOrigLen-3; i++)
{
// for now, add in 25 frames on either side (a window of .51 seconds total) of average
iStart = (int)(i*fMidShift-25);
iStop = (int)(i*fMidShift+25);
if (iStart < 0)
{
iStart = 0;
}
if (iStop > iSynthLen)
{
iStop=iSynthLen;
}
iStart += 1;
for (j=iStart; j <= iStop; j++)
{
Viterbi.Add (i, (void *)j);
}
}
Viterbi.FindBestPath (ConcatCost, UnitCost, &fCost);
//
// Best path now in void ** Viterbi.m_rgpBestElems
//
free (gafConcatCosts);
aiPath = (int *)malloc (sizeof(int)*iOrigLen);
for (i=0; i < iOrigLen; i++)
{
aiPath[i] = (int)(Viterbi.m_rgpBestElems[i]) - 1;
}
return aiPath;
}