/****************************************************************************** * trees.cpp * *-----------* * *------------------------------------------------------------------------------ * Copyright (c) 1997 Entropic Research Laboratory, Inc. * Copyright (C) 1998 Entropic, Inc * Copyright (C) 2000 Microsoft Corporation Date: 03/02/00 - 12/5/00 * All Rights Reserved * ********************************************************************* mplumpe was PACOG ***/ #include "trees.h" #include "list.h" #include "clusters.h" #include #include #define MAX_QS_LEN 128 #define MAX_LINE 512 class CRegExp { public: CRegExp (); CRegExp (const char* string); bool Evaluate(const char* pszString); private: char m_text[MAX_QS_LEN]; }; //---------------------------------------------------------- // Question set classes // class CQuest { public: CQuest& operator= (CQuest& rSrc) { m_pExpr = rSrc.m_pExpr; return *this; } int AddExpression (const char* pszLine); bool Matches (const char* pszString); #ifdef _DEBUG_ void Debug(); #endif private: CList m_pExpr; }; //---------------------------------------------------------- // // class CQuestSet { public: bool Matches (const char* pszQuestTag, const char* pszTriph); bool AddQuestion ( const char* pszLine); void Sort(); #ifdef _DEBUG_ void Debug(); #endif private: CList m_pQuest; }; //---------------------------------------------------------- // Tree classes // class CLeave { public: CLeave () {m_pszLeave[0] = '\0';}; CLeave (const char* pszLeaveValue); const char* Value(); private: char m_pszLeave[MAX_QS_LEN]; }; //---------------------------------------------------------- // // class CBranch { public: CBranch () { m_pszQuestion[0] = '\0'; m_iLeft = 0; m_iRight = 0; } CBranch( const char* pszQuestion, int iLeft, int iRight); int Left(); int Right(); const char* Question(); private: char m_pszQuestion[MAX_QS_LEN]; int m_iLeft; int m_iRight; }; //---------------------------------------------------------- // // class CTree { public: CTree& operator= (CTree& rSrc) { m_branches = rSrc.m_branches; m_terminals = rSrc.m_terminals; return *this; } int AddNode( const char* pszLine); const char* Traverse(CQuestSet* pQuestSet, const char* pszTriphone); #ifdef _DEBUG_ void Debug(); #endif private: CList m_branches; CList m_terminals; }; //---------------------------------------------------------- // // class CClustTreeImp : CClustTree { public: ~CClustTreeImp(); int LoadFromFile (FILE* fp); int GetNumStates (const char* pszTriphone); const char* TriphoneToCluster(const char* pszTriphone, int iState); #ifdef _DEBUG_ void Debug(); #endif private: int ParseTree (const char* pszLine); int CentralPhone (const char *pszTriphone, char *pszThone); CQuestSet* m_pQuestSet; CList m_trees; }; /***************************************************************************** * CLeave::CLeave * *----------------* * Description: * ******************************************************************* PACOG ***/ CLeave::CLeave (const char* pszLeaveValue) { strcpy(m_pszLeave, pszLeaveValue); } /***************************************************************************** * CLeave::Value * *---------------* * Description: * ******************************************************************* PACOG ***/ const char* CLeave::Value() { return m_pszLeave; } /***************************************************************************** * CBranch::CBranch * *------------------* * Description: * ******************************************************************* PACOG ***/ CBranch::CBranch( const char* pszQuestion, int iLeft, int iRight) { strcpy(m_pszQuestion, pszQuestion); m_iLeft = iLeft; m_iRight = iRight; } /***************************************************************************** * CBranch::Left * *---------------* * Description: * ******************************************************************* PACOG ***/ int CBranch::Left() { return m_iLeft; } /***************************************************************************** * CBranch::Right * *----------------* * Description: * ******************************************************************* PACOG ***/ int CBranch::Right() { return m_iRight; } /***************************************************************************** * CBranch::Question * *-------------------* * Description: * ******************************************************************* PACOG ***/ const char* CBranch::Question() { return m_pszQuestion; } /***************************************************************************** * CClustTree::ClassFactory * *--------------------------* * Description: * ******************************************************************* PACOG ***/ CClustTree* CClustTree::ClassFactory () { return new CClustTreeImp; } /***************************************************************************** * CClustTreeImp::~CClustTreeImp * *-------------------------------* * Description: * ******************************************************************* PACOG ***/ CClustTreeImp::~CClustTreeImp () { delete m_pQuestSet; } /***************************************************************************** * CClustTreeImp::LoadFromFile * *-----------------------------* * Description: * ******************************************************************* PACOG ***/ int CClustTreeImp::LoadFromFile (FILE* fp) { char line[MAX_LINE+1]; char *ptr; assert (fp); if ((m_pQuestSet = new CQuestSet) == 0) { return 0; } while (fgets(line, MAX_LINE, fp) && line[0]!='#') { if (line[strlen(line)-1]=='\r' || line[strlen(line)-1]=='\n') { line[strlen(line)-1]= '\0'; } ptr = line; while (*ptr && isspace (*ptr)) { ptr++; } if (strncmp(ptr, "QS ", 3)==0) { if (!m_pQuestSet->AddQuestion (ptr+3)) { return 0; } } else { if (!ParseTree (ptr)) { return 0; } } } m_pQuestSet->Sort(); m_trees.Sort(); #ifdef _DEBUG_ Debug(); #endif return 1; } /***************************************************************************** * CClustTreeImp::GetNumStates * *-----------------------------* * Description: * ******************************************************************* PACOG ***/ int CClustTreeImp::GetNumStates(const char* triphone) { char triphHtk[20]; char centralPhone[10]; char stateName[20]; int stateCount = 0; strcpy(triphHtk, triphone); if ( CentralPhone(triphHtk, centralPhone) ) { for (stateCount = 0; stateCount<3; stateCount++) { sprintf(stateName, "%s[%d]", centralPhone, stateCount+2); CTree* tree; if ( ! m_trees.Find (stateName, &tree) ) { break; } } } return stateCount; } /***************************************************************************** * CClustTreeImp::TriphoneToCluster * *----------------------------------* * Description: * ******************************************************************* PACOG ***/ const char *CClustTreeImp::TriphoneToCluster (const char *triphone, int state) { char centralPhone[10]; char stateName[20]; char triphHtk[20]; assert (triphone); assert (0<=state && state<3); strcpy(triphHtk, triphone); if ( CentralPhone(triphHtk, centralPhone) ) { sprintf(stateName, "%s[%d]", centralPhone, state+2); CTree* tree = 0; if ( m_trees.Find (stateName, &tree) ) { return tree->Traverse(m_pQuestSet, triphHtk); } } return 0; } /***************************************************************************** * CClustTreeImp::CentralPhone * *-----------------------------* * Description: * ******************************************************************* PACOG ***/ int CClustTreeImp::CentralPhone (const char *triphone, char *phone) { char *index1; char *index2; assert (phone); assert (triphone); index1 = strchr(triphone, '-'); if (index1) { index2 = strchr (++index1, '+'); } if ( index1 && index2 ) { strncpy ( phone, index1, index2-index1); phone[index2-index1] = '\0'; return 1; } return 0; } /***************************************************************************** * CClustTreeImp::ParseTree * *--------------------------* * Description: * ******************************************************************* PACOG ***/ int CClustTreeImp::ParseTree (const char *ptr) { static int newTree = 1; assert (ptr); if (!strlen (ptr)) { newTree = 1; } else if (strncmp(ptr,"{",1)==0) { newTree = 0; } else if (strncmp(ptr,"}",1)==0) { newTree = 1; } else { if (newTree ) { CTree tree; m_trees.PushBack(ptr, tree); newTree = 0; } else { m_trees.Back().AddNode(ptr); } } return 1; } /***************************************************************************** * CTree::AddNode * *----------------* * Description: * ******************************************************************* PACOG ***/ int CTree::AddNode (const char *line) { char aux1[50] = ""; char aux2[50] = ""; char *index1; char *index2; int leftIdx; int rightIdx; int i; assert (line); if (line[0]=='"') { // This is the final node (tree only has one cluster) index1 = strchr(line+1, '"'); if (index1) { strncpy(aux1, line+1, index1 - line - 1); aux1[index1 - line - 1] = '\0'; CLeave terminal(aux1); m_terminals.PushBack("", terminal); } } else { //Node name index1 = strchr(line, '\''); if (index1) { index2 = strchr(++index1, '\''); strncpy(aux1, index1, index2 - index1); aux1[index2 - index1] = '\0'; } index1 = ++index2; while (*index1 && isspace (*index1)) { index1++; } //Left node if (*index1 == '"') { index2 = strchr (++index1, '"'); strncpy(aux2, index1, index2 - index1); aux2[index2 - index1] = '\0'; CLeave terminal(aux2); m_terminals.PushBack("", terminal); leftIdx = m_terminals.Size() - 1; index1 = ++index2; } else { if (*index1 == '-') { aux2[0]= *index1++; } for (i=1 ; isdigit(*index1); i++) { aux2[i]= *index1++; } aux2[i]='\0'; leftIdx = atoi (aux2); } while (isspace(*++index1)) { //Empty loop } //Right node if (*index1 == '"') { index2 = strchr (++index1, '"'); strncpy(aux2, index1, index2 - index1); aux2[index2 - index1] = '\0'; CLeave terminal(aux2); m_terminals.PushBack("", terminal); rightIdx = m_terminals.Size() - 1; } else { if (*index1== '-') { aux2[0]= *index1++; } for (i=1; isdigit(*index1); i++) { aux2[i]= *index1++; } aux2[i]='\0'; rightIdx = atoi (aux2); } CBranch node(aux1, leftIdx, rightIdx); m_branches.PushBack("", node); } return 1; } /***************************************************************************** * CTree::Traverse * *-----------------* * Description: * ******************************************************************* PACOG ***/ const char *CTree::Traverse (CQuestSet* pQuestSet, const char *triph) { char *retVal = 0; int nodeIdx = 0; int nextIdx; assert (triph); if (m_branches.Size() == 0) { return m_terminals[0].Value(); } // Search until we find a leave while (!retVal) { if (nodeIdx > m_branches.Size()) { return 0; } if (pQuestSet->Matches (m_branches[nodeIdx].Question(), triph)) { nextIdx = m_branches[nodeIdx].Right(); } else { nextIdx = m_branches[nodeIdx].Left(); } if ( nextIdx >= 0) { retVal = (char *)m_terminals[nextIdx].Value(); } else { nodeIdx = -nextIdx; } } return retVal; } /***************************************************************************** * CRegExp::CRegExp * *------------------* * Description: * ******************************************************************* PACOG ***/ CRegExp::CRegExp () { m_text[0] = '\0'; } /***************************************************************************** * CRegExp::CRegExp * *------------------* * Description: * ******************************************************************* PACOG ***/ CRegExp::CRegExp (const char* regExp) { strcpy(m_text, regExp); } /***************************************************************************** * CRegExp::Evaluate * *-------------------* * Description: * ******************************************************************* PACOG ***/ bool CRegExp::Evaluate (const char *string) { const char *index1; const char *index2; int len; int i; int jump = 0; assert (string); len = strlen(m_text); index1 = string; for (i=0; iMatches (triph); } return false; } /***************************************************************************** * CQuestSet::Sort * *-----------------* * Description: * ******************************************************************* PACOG ***/ void CQuestSet::Sort () { m_pQuest.Sort(); } #ifdef _DEBUG_ /***************************************************************************** * CClustTreeImp::Debug * *----------------------* * Description: * ******************************************************************* PACOG ***/ void CClustTreeImp::Debug () { m_questionSet.Debug(); for (int i=0; i=0) { printf("Left= %s ", m_terminals[i].Value()); } else { printf("Left= %ld ", -idx); } idx = m_branches[i].Right(); if (idx>=0) { printf("Right= %s ", m_terminals[i].Value()); } else { printf("Right= %ld ", -idx); } } } /***************************************************************************** * CQuestSet::Debug * *------------------* * Description: * ******************************************************************* PACOG ***/ void CQuestSet::Debug () { for (int i=0; i