windows-nt/Source/XPSP1/NT/enduser/speech/sapi/cpl/silence.h
2020-09-26 16:20:57 +08:00

114 lines
3.8 KiB
C++

/*********************************************************************
Silence.H - Includes to use the code to detect silence.
begun 5/14/94 by Mike Rozak
*/
#ifndef _SILENCE_H_
#define _SILENCE_H_
#ifndef _SPEECH_
typedef unsigned _int64 QWORD, *PQWORD;
#endif
/*********************************************************************
Typedefs */
#define SIL_YES (2)
#define SIL_NO (0)
#define SIL_UNKNOWN (1)
// #define SIL_SAMPRATE (11025) // assumed sampling rate
#define PHADD_BEGIN_SILENCE (4) // 1/4 second
#define PCADD_BEGIN_SILENCE (4) // 1/4 second
#define FILTERNUM (1024) // max # samples i nthe filter
#if 0
#define MAXVOICEHZ (300) // maximum voice pitchm in hz
#define PHMAXVOICEHZ (300) // maximum voice pitch in hz (phone)
#endif
#define PHMAXVOICEHZ (500) // maximum voice pitch in hz (phone)
#define PCMAXVOICEHZ (500) // maximum voice pitch in hz (PC)
#define MINVOICEHZ (50) // minimum voice pitch in hz
// Store characteristics of a block
typedef struct {
WORD wMaxLevel;
WORD wMaxDelta;
BYTE bIsVoiced;
BYTE bHighLevel;
BYTE bHighDelta;
} BLOCKCHAR, *PBLOCKCHAR;
// Store information about a block
typedef struct {
short *pSamples; // Sample data, or NULL if empty
DWORD dwNumSamples; // number of samples in block
QWORD qwTimeStamp; // time stamp for block
} BINFO, *PBINFO;
class CSilence {
private:
WORD m_wBlocksPerSec;
WORD m_wBlocksInQueue;
WORD m_wLatestBlock; // points to the last block entered in the circular list
PBINFO m_paBlockInfo;
DWORD m_dwSoundBits;
DWORD m_dwVoicedBits; // turned on if block was voiced
BLOCKCHAR m_bcSilence; // what silence is
BOOL m_fFirstBlock; // TRUE if the next block is the first
// block ever, and used to judge silence, else FALSE
BOOL m_fInUtterance; // TRUE if we're in an utterance
DWORD m_dwUtteranceLength; // Number of frames that utterance has gone on
WORD m_wReaction; // reaction time
WORD m_wNoiseThresh; // noiuse threshhold
short *m_pASFiltered; // pointer to filtered data buffer
WORD m_wAddSilenceDiv;
DWORD m_dwHighFreq;
DWORD m_dwSamplesPerSec;
#ifdef USE_REG_ENG_CTRL
BOOL m_fSilenceDetectEnbl;
BOOL m_fVoiceDetectEnbl;
WORD m_wTimeToCheckDiv;
DWORD m_dwLowFreq;
DWORD m_dwCheckThisManySamples;
DWORD m_dwNumFilteredSamples;
WORD m_wMinConfidenceAdj;
DWORD m_dwLPFShift;
DWORD m_dwLPFWindow;
#endif
public:
CSilence (WORD wBlocksPerSec);
~CSilence (void);
BOOL Init(BOOL fPhoneOptimized, DWORD dwSamplesPerSec);
BOOL AddBlock (short * pSamples, DWORD dwNumSamples, WORD * wVU,
QWORD qwTimeStamp);
short * GetBlock (DWORD * pdwNumSamples, QWORD * pqwTimeStamp);
void KillUtterance(void);
void NoiseResistSet (WORD wValue)
{
m_wNoiseThresh = wValue;
};
void ReactionTimeSet (DWORD dwTime)
{m_wReaction = (WORD) ((dwTime * m_wBlocksPerSec) / 1000);};
WORD GetBackgroundNoise (void)
{return m_bcSilence.wMaxLevel;};
void ExpectNoiseChange (WORD wValue);
private:
BOOL CSilence::IsSegmentVoiced (short *pSamples, DWORD dwNumSamples,
DWORD dwSamplesPerSec, WORD wMinConfidence, short *asFiltered);
BOOL CSilence::WhatsTheNewState (DWORD dwSoundBits, DWORD dwVoicedBits,
BOOL fWasInUtterance, BOOL fLongUtterance,
WORD wBlocksPerSec, WORD *wStarted, WORD wReaction);
};
typedef CSilence *PCSilence;
WORD NEAR PASCAL TrimMaxAmp(short * lpS, DWORD dwNum);
#endif // _SILENCE_H_