//$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ // // Copyright (c) 2001 Microsoft Corporation. All rights reserved. // // Module: // volcano/dll/lattice-lm.cpp // // Description: // Interface between the recognizer and IFELang3. Unlike the rest of // the recognizer this is written in C++ because IFELang3 uses a COM // interface. // // Author: // hrowley // //$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ // Conditional define for use of IFELang3, so it can be turned off for WinCE #ifdef USE_IFELANG3 #include #include // Make sure the GUIDs defined in imlang.h actually get instantiated #define INITGUID // Uncomment to dump out the lattice fed to the language model //#define DUMP_LM_LATTICE // Uncomment to append all the lattices into one file rather than overwriting. //#define DUMP_LM_LATTICE_APPEND #define LM_LATTICE_FILENAME L"c:/temp/cht-lm-log.txt" // Uncomment this to get the dumped lattice in the form expected by the CHS test program //#define LM_LATTICE_UNICODE //#define LM_LATTICE_CHS #define LM_LATTICE_CHT #if defined(HWX_TUNE) || defined(DUMP_LM_LATTICE) #include #endif #include "volcanop.h" // Japanese and IFELang3 generic stuff #include "imlang.h" // Simplified and traditional Chinese #include "fel3user.h" #ifdef USE_IFELANG3_BIGRAMS #include "lattice-bigram.h" #endif // A global pointer to the IIMLanguage interface object static IIMLanguage *g_pIFELang3=NULL; // Each language has its own set of GUIDs used as command codes, so that the IIMLanguage interface can direct requests to // the appropriate language model. static int SetLanguageModelCommands(IMLANGUAGE_LM aLMITEM[3], IMLANGUAGE_LM_PARAM *sTuneParam, SImeLMParam *sHWTip1Param) { int nLMITEM = 0; // Japanese if (wcsicmp(g_szRecognizerLanguage,L"JPN")==0) { /* aLMITEM[nLMITEM].guid = CLSID_MakeLatticeForChar; aLMITEM[nLMITEM].pParam = NULL; nLMITEM++; aLMITEM[nLMITEM].guid = CLSID_AttachNGram; aLMITEM[nLMITEM].pParam = NULL; nLMITEM++; aLMITEM[nLMITEM].guid = CLSID_SearchBestPath; aLMITEM[nLMITEM].pParam = NULL; nLMITEM++; */ aLMITEM[nLMITEM].guid = CLSID_JPN_IMELM_BBO_OS; aLMITEM[nLMITEM].pParam = NULL; nLMITEM++; } // For now, disable the use of the Chinese and Korean language models, since // at least the Chinese language models currently are hurting accuracy. // Simplified Chinese if (wcsicmp(g_szRecognizerLanguage,L"CHS")==0) { sHWTip1Param->dwLicenseId = CHINESE_IMELM_LICENSEID; sHWTip1Param->flWeight = (float)(1.0); sTuneParam->guid = GUID_CHINESE_IMELM_PARAM; sTuneParam->dwSize = sizeof(SImeLMParam); sTuneParam->pbData = (BYTE*)sHWTip1Param; aLMITEM[nLMITEM].guid = CLSID_CHS_IMELM_BBO_OS; aLMITEM[nLMITEM].pParam = sTuneParam; nLMITEM++; } #if 0 // Traditional Chinese if (wcsicmp(g_szRecognizerLanguage,L"CHT")==0) { sHWTip1Param->dwLicenseId = CHINESE_IMELM_LICENSEID; sHWTip1Param->flWeight = (float)(1.0); sTuneParam->guid = GUID_CHINESE_IMELM_PARAM; sTuneParam->dwSize = sizeof(SImeLMParam); sTuneParam->pbData = (BYTE*)sHWTip1Param; aLMITEM[nLMITEM].guid = CLSID_CHT_IMELM_BBO_OS; aLMITEM[nLMITEM].pParam = sTuneParam; nLMITEM++; } // Korean if (wcsicmp(g_szRecognizerLanguage,L"KOR")==0) { aLMITEM[nLMITEM].guid = CLSID_KOR_IMELM_BBO_OS; aLMITEM[nLMITEM].pParam = NULL; nLMITEM++; } #endif // Return the number of command codes, or 0 if there was no match for the current language return nLMITEM; } // This function tests the IIMLanguage interface to see if the appropriate language model is // installed. BOOL TryDummyLattice() { IMLANGUAGE_LM_PARAM sTuneParam; SImeLMParam sHWTip1Param; IMLANGUAGE_LM aLMITEM[3]; int nLMITEM = 0; HRESULT rc; // Set return code to look like an error rc = E_FAIL; // Set up the language specific commands nLMITEM = SetLanguageModelCommands(aLMITEM, &sTuneParam, &sHWTip1Param); if (nLMITEM>0) { // If there are any commands for that language, then apply them to the lattice. rc = g_pIFELang3->GetLatticeMorphResult(nLMITEM, aLMITEM, 0, NULL, 0, NULL); } return (SUCCEEDED(rc)); } // Load up IIMLanguage and check if it has a language model for this recognizer's language. // If successful, returns TRUE, otherwise returns FALSE. BOOL LatticeConfigIFELang3() { if (g_pIFELang3 != NULL) { return FALSE; } HRESULT res = CoCreateInstance(CLSID_IMLanguage_OS, NULL, CLSCTX_INPROC_SERVER, IID_IMLanguage, (void**)&g_pIFELang3); // TPDBG_DMSG2("%08X: CoCreate(CLSID_IMLanguage_OS) -> %08X\n", GetCurrentThreadId(), res); if (SUCCEEDED(res)) { if (TryDummyLattice()) { // TPDBG_DMSG1("%08X: TryDummyLattice() -> TRUE\n", GetCurrentThreadId()); return TRUE; } } // Clean up if things didn't work. // TPDBG_DMSG1("%08X: TryDummyLattice() -> FALSE\n", GetCurrentThreadId()); LatticeUnconfigIFELang3(); return FALSE; } // Check whether IIMLanguage is loaded. BOOL LatticeIFELang3Available() { return (g_pIFELang3!=NULL); } // Unload IIMLanguage if it is loaded. BOOL LatticeUnconfigIFELang3() { if (g_pIFELang3!=NULL) { // TPDBG_DMSG1("%08X: g_pIFELang3->Release()\n", GetCurrentThreadId()); g_pIFELang3->Release(); g_pIFELang3=NULL; } return TRUE; } // qsort: compare the start time of element int __cdecl CompareElemTime(const void *ps1, const void *ps2) { return ((IMLANGUAGE_ELEMENT*)ps1)->dwFrameStart - ((IMLANGUAGE_ELEMENT*)ps2)->dwFrameStart; } BOOL ProbIsBad(LATTICE *lat, float flProb) { if (lat->fUseGuide) { return (flProb < g_vtuneInfo.pTune->flStringHwxThreshold); } else { return (flProb < g_vtuneInfo.pTune->flFreeHwxThreshold); } } // Figure out which strokes are valid end of character in paths that span the // whole lattice. static BOOL CheckIfValid(LATTICE *lat, int iStroke, BYTE *pValidEnd) { LATTICE_ALT_LIST *pAlts; int ii; BOOL fValidPath; // Have we already checked this position? if (pValidEnd[iStroke] == LCF_INVALID) { return FALSE; } else if (pValidEnd[iStroke] == LCF_VALID) { return TRUE; } // Assume no valid path, until proved otherwise. fValidPath = FALSE; // Process each element ending at this position in lattice. pAlts = lat->pAltList + iStroke; for (ii = 0; ii < pAlts->nUsed; ++ii) { LATTICE_ELEMENT *pElem; int prevEnd; pElem = pAlts->alts + ii; prevEnd = iStroke - pElem->nStrokes; ASSERT(prevEnd >= -1); if (ProbIsBad(lat, pElem->logProb)) { // Pretend that the really bad scores don't exist! // This test MUST match code building up lattice to use! } else if (prevEnd < 0) { // Reached start. fValidPath = TRUE; } else if (CheckIfValid(lat, prevEnd, pValidEnd)) { // Have path to start. fValidPath = TRUE; } } // Did one (or more) paths reach the start? if (fValidPath) { pValidEnd[iStroke] = LCF_VALID; return TRUE; } else { pValidEnd[iStroke] = LCF_INVALID; return FALSE; } } // Apply language model to produce a better current path void ApplyLanguageModel(LATTICE *lat, wchar_t *wszCorrectAnswer) { DWORD dwNeutralSize; IMLANGUAGE_ELEMENT *pLattice; IMLANGUAGE_INFO *pDataList; #ifdef USE_IFELANG3_BIGRAMS DWORD dwBigramSize; LATTICE_BIGRAM_INFO_LIST *pBigramInfo; #endif int cElem, cData; int cLattice; DWORD cElemOut; IMLANGUAGE_ELEMENT *pElemOut; IMLANGUAGE_LM_PARAM sTuneParam; SImeLMParam sHWTip1Param; IMLANGUAGE_LM aLMITEM[3]; IMLANGUAGE_INFO_NEUTRAL1 *pNeutral; BYTE *pValidEnd; int size; int nLMITEM; DWORD ii, jj; HRESULT rc; #ifdef DUMP_LM_LATTICE FILE *f; #endif int iStroke, iAlt; int cPreContext = 0, cPostContext = 0; // DebugBreak(); // return; // fprintf(stderr,"ApplyLanguageModel() fUseIFELang3=%d\n", lat->fUseIFELang3); ASSERT(lat!=NULL); if (!LatticeIFELang3Available()) return; if (!lat->fUseLM || !lat->fUseIFELang3) return; if (lat->nStrokes==0) return; // Check if there are any alphabetic characters in the best path. If so, skip IFELang3 for (iStroke = 0; iStroke < lat->nStrokes; iStroke++) { for (iAlt = 0; iAlt < lat->pAltList[iStroke].nUsed; iAlt++) { if (lat->pAltList[iStroke].alts[iAlt].fCurrentPath) { wchar_t dch = lat->pAltList[iStroke].alts[iAlt].wChar; if (dch != SYM_UNKNOWN) { wchar_t wch = LocRunDense2Unicode(&g_locRunInfo, dch); if ((wch >= L'a' && wch <= L'z') || (wch >= L'A' && wch <= L'Z')) { return; } } } } } // Check if we have a character of context that needs to be included in the lattice if (lat->wszBefore != NULL) cPreContext = wcslen(lat->wszBefore); if (lat->wszAfter != NULL) cPostContext = wcslen(lat->wszAfter); // How big does each lattice element need to be? The structure includes space for // one character, and it needs to be followed by 2 NUL characters. dwNeutralSize = sizeof(IMLANGUAGE_INFO_NEUTRAL1) + sizeof(wchar_t)*2; // Figure out valid character transition points by stroke. We don't wan't to include // pieces of lattice that don't actually connect up for the full run. This uses // a recursive algorithm to find valid paths. Since it marks the paths as it // finds them, it can quickly test and not redo work. size = sizeof(BYTE) * lat->nStrokes; pValidEnd = (BYTE *)ExternAlloc(size); // If we failed to allocate memory, just return if (pValidEnd==NULL) return; memset(pValidEnd, 0, size); if (!CheckIfValid(lat, lat->nStrokes - 1, pValidEnd)) { // No valid paths found?!?! // This can happen... so clean up and return. // We'll just get the best path selected by the HWX engine. goto allocated_valid; } // First allocate space to build the lattice up in. cElem = lat->nStrokes * MaxAltsPerStroke + (cPreContext + cPostContext); cData = cElem; #ifdef USE_IFELANG3_BIGRAMS cData += lat->nStrokes*MaxAltsPerStroke; #endif pDataList = (IMLANGUAGE_INFO*)ExternAlloc(sizeof(IMLANGUAGE_INFO)*cData); if (pDataList==NULL) goto allocated_valid; pLattice = (IMLANGUAGE_ELEMENT*)ExternAlloc(sizeof(IMLANGUAGE_ELEMENT)*cElem); if (pLattice==NULL) goto allocated_data; #ifdef DUMP_LM_LATTICE #ifdef DUMP_LM_LATTICE_APPEND f=_wfopen(LM_LATTICE_FILENAME,L"ab"); #else f=_wfopen(LM_LATTICE_FILENAME,L"wb"); #endif fwprintf(f, L"\r\n\r\n"); #endif #ifdef HWX_TUNE if (g_pTuneFile != NULL) { for (int i = 0; i < (int) wcslen(wszCorrectAnswer); i++) { if (i > 0) { fprintf(g_pTuneFile, " "); } fprintf(g_pTuneFile, "%04X", wszCorrectAnswer[i]); } fprintf(g_pTuneFile, "\n"); } #endif cLattice = 0; cData = 0; if (lat->wszBefore != NULL) { int iSrc; int iFrame = 1; for (iSrc = wcslen(lat->wszBefore) - 1; iSrc >= 0; iSrc--) { // If we have a context character, then create a lattice element for it. pNeutral = (IMLANGUAGE_INFO_NEUTRAL1*)ExternAlloc(dwNeutralSize); if (pNeutral == NULL) goto allocated_elements; pNeutral->dwUnigram = 0; pNeutral->fHypothesis = FALSE; pNeutral->wsz[0] = LocRunDense2Unicode(&g_locRunInfo, lat->wszBefore[iSrc]); pNeutral->wsz[1] = 0; pNeutral->wsz[2] = 0; pDataList[cData].guid = GUID_IMLANGUAGE_INFO_NEUTRAL1; pDataList[cData].dwSize = dwNeutralSize; pDataList[cData].pbData = (BYTE*)pNeutral; pLattice[cLattice].dwFrameStart = iFrame++; pLattice[cLattice].dwFrameLen = 1; pLattice[cLattice].dwTotalInfo = 1; pLattice[cLattice].pInfo = pDataList + cLattice; #ifdef HWX_TUNE if (g_pTuneFile != NULL) { fprintf(g_pTuneFile, "%04X %d %d %f\n", pNeutral->wsz[0], pLattice[cLattice].dwFrameStart, pLattice[cLattice].dwFrameLen, 0.0); } #endif #ifdef DUMP_LM_LATTICE if (f!=NULL) { char sz[256]; ZeroMemory( sz, 256 ); #if defined(LM_LATTICE_CHS) WideCharToMultiByte( 936, 0, pNeutral->wsz, -1, sz, 256, 0, 0 ); #elif defined(LM_LATTICE_CHT) WideCharToMultiByte( 950, 0, pNeutral->wsz, -1, sz, 256, 0, 0 ); #elif defined(LM_LATTICE_JPN) WideCharToMultiByte( 932, 0, pNeutral->wsz, -1, sz, 256, 0, 0 ); #endif #if defined(LM_LATTICE_CHS) || defined(LM_LATTICE_CHT) fwprintf(f, L"%s, %d, %d, %d\r\n", pNeutral->wsz, pLattice[cLattice].dwFrameStart, pLattice[cLattice].dwFrameLen, pNeutral->dwUnigram); #elif defined(LM_LATTICE_UNICODE) fwprintf(f, L"%3d %2d %10d %s U+%04X\r\n", pLattice[cLattice].dwFrameStart, pLattice[cLattice].dwFrameLen, pNeutral->dwUnigram, pNeutral->wsz, pNeutral->wsz[0] ); #endif } #endif cData++; cLattice++; } } // Now process each position. for (iStroke=0; iStrokenStrokes; iStroke++) { // Do we actually want any paths that end at this location? if (pValidEnd[iStroke] != LCF_VALID) { continue; } // int max=5; // if (iStroke==0) max=2; for (iAlt=0; iAltpAltList[iStroke].nUsed; iAlt++) if (1 /*iAltpAltList[iStroke].alts[iAlt].fCurrentPath*/) { wchar_t unicode; float flScore; // Prune really bad scores. This MUST match test in path // checking code! if (ProbIsBad(lat, lat->pAltList[iStroke].alts[iAlt].logProb)) { continue; } // Fill in lattice element pNeutral = (IMLANGUAGE_INFO_NEUTRAL1*)ExternAlloc(dwNeutralSize); if (pNeutral==NULL) goto allocated_elements; flScore = -lat->pAltList[iStroke].alts[iAlt].logProb; if (lat->fUseGuide) { flScore *= g_vtuneInfo.pTune->flStringHwxWeight; } else { flScore *= g_vtuneInfo.pTune->flFreeHwxWeight * lat->pAltList[iStroke].alts[iAlt].nStrokes; } if (flScore < 0) flScore = (float)0; if (flScore > INT_MAX) flScore = (float)INT_MAX; pNeutral->dwUnigram = (DWORD)(flScore); if (lat->pAltList[iStroke].alts[iAlt].wChar==SYM_UNKNOWN) { unicode=L' '; } else { unicode = LocRunDense2Unicode( &g_locRunInfo, lat->pAltList[iStroke].alts[iAlt].wChar ); } pNeutral->fHypothesis = FALSE; pNeutral->wsz[0] = unicode; pNeutral->wsz[1] = 0; pNeutral->wsz[2] = 0; // Check for EUDC codes, which we shouldn't be generating (for debugging) // if (pNeutral->wsz[0]>=0xE000 && pNeutral->wsz[0]<0xF900) { // fprintf(stderr,"EUDC code in lattice (stroke %d alt %d): U+%04X\n",iStroke,iAlt,pNeutral->wsz[0]); // } pLattice[cLattice].dwFrameStart = (iStroke - lat->pAltList[iStroke].alts[iAlt].nStrokes) + 2 + cPreContext; pLattice[cLattice].dwFrameLen=lat->pAltList[iStroke].alts[iAlt].nStrokes; pLattice[cLattice].dwTotalInfo = 1; pLattice[cLattice].pInfo = pDataList + cData; pDataList[cData].guid = GUID_IMLANGUAGE_INFO_NEUTRAL1; pDataList[cData].dwSize = dwNeutralSize; pDataList[cData].pbData = (BYTE*)pNeutral; cData++; #ifdef HWX_TUNE if (g_pTuneFile != NULL) { fprintf(g_pTuneFile, "%04X %d %d %f\n", pNeutral->wsz[0], pLattice[cLattice].dwFrameStart, pLattice[cLattice].dwFrameLen, lat->pAltList[iStroke].alts[iAlt].logProb); } #endif #ifdef USE_IFELANG3_BIGRAMS lat->pAltList[iStroke].alts[iAlt].indexIFELang3=cLattice; if (lat->pAltList[iStroke].alts[iAlt].nBigrams>0) { dwBigramSize=sizeof(LATTICE_BIGRAM_INFO_LIST)+ sizeof(LATTICE_BIGRAM_INFO)*(lat->pAltList[iStroke].alts[iAlt].nBigrams-1); pBigramInfo = (LATTICE_BIGRAM_INFO_LIST*)ExternAlloc(dwBigramSize); if (pBigramInfo==NULL) goto allocated_elements; int iPrevStart=iStroke-lat->pAltList[iStroke].alts[iAlt].nStrokes; for (int i=0; ipAltList[iStroke].alts[iAlt].nBigrams; i++) { pBigramInfo->bigrams[i].dwBigram=(DWORD)(lat->pAltList[iStroke].alts[iAlt].bigramLogProbs[i]); pBigramInfo->bigrams[i].dwPrevElement= lat->pAltList[iPrevStart].alts[lat->pAltList[iStroke].alts[iAlt].bigramAlts[i]].indexIFELang3; } pDataList[cData].guid = GUID_LATTICE_BIGRAM_INFO_LIST; pDataList[cData].dwSize = dwBigramSize; pDataList[cData].pbData = (BYTE*)pBigramInfo; cData++; pLattice[cLattice].dwTotalInfo++; } #endif #ifdef DUMP_LM_LATTICE if (f!=NULL) { char sz[256]; ZeroMemory( sz, 256 ); #if defined(LM_LATTICE_CHS) WideCharToMultiByte( 936, 0, pNeutral->wsz, -1, sz, 256, 0, 0 ); #elif defined(LM_LATTICE_CHT) WideCharToMultiByte( 950, 0, pNeutral->wsz, -1, sz, 256, 0, 0 ); #elif defined(LM_LATTICE_JPN) WideCharToMultiByte( 932, 0, pNeutral->wsz, -1, sz, 256, 0, 0 ); #endif // fprintf(f,"%3d %2d %10d %s\n",pLattice[cLattice].dwFrameStart,pLattice[cLattice].dwFrameLen,pNeutral->dwUnigram,sz); // fprintf(f,"%3d %2d %10d %s U+%04X\n",pLattice[cLattice].dwFrameStart,pLattice[cLattice].dwFrameLen,pNeutral->dwUnigram,sz,pNeutral->wsz[0]); #if defined(LM_LATTICE_CHS) || defined(LM_LATTICE_CHT) fwprintf(f, L"%s, %d, %d, %d\r\n", pNeutral->wsz, pLattice[cLattice].dwFrameStart, pLattice[cLattice].dwFrameLen, pNeutral->dwUnigram); #elif defined(LM_LATTICE_UNICODE) fwprintf(f, L"%3d %2d %10d %s U+%04X\r\n", pLattice[cLattice].dwFrameStart, pLattice[cLattice].dwFrameLen, pNeutral->dwUnigram, pNeutral->wsz, pNeutral->wsz[0] ); #endif #ifdef USE_IFELANG3_BIGRAMS fprintf(f,"%d",lat->pAltList[iStroke].alts[iAlt].nBigrams); int iPrevStart=iStroke-lat->pAltList[iStroke].alts[iAlt].nStrokes; for (int i=0; ipAltList[iStroke].alts[iAlt].nBigrams; i++) { fprintf(f," %d %d\n", lat->pAltList[iPrevStart].alts[lat->pAltList[iStroke].alts[iAlt].bigramAlts[i]].indexIFELang3, lat->pAltList[iStroke].alts[iAlt].bigramLogProbs); } fprintf(f,"\n"); #endif } #endif cLattice++; } } if (lat->wszAfter != NULL) { int iFrame = pLattice[cLattice].dwFrameStart = (lat->nStrokes - 1) + 2 + cPreContext; for (int iSrc = 0; iSrc < (int) wcslen(lat->wszAfter); iSrc++) { // If we have a context character, then create a lattice element for it. pNeutral = (IMLANGUAGE_INFO_NEUTRAL1*)ExternAlloc(dwNeutralSize); if (pNeutral == NULL) goto allocated_elements; pNeutral->dwUnigram = 0; pNeutral->fHypothesis = FALSE; pNeutral->wsz[0] = LocRunDense2Unicode(&g_locRunInfo, lat->wszAfter[iSrc]); pNeutral->wsz[1] = 0; pNeutral->wsz[2] = 0; pDataList[cData].guid = GUID_IMLANGUAGE_INFO_NEUTRAL1; pDataList[cData].dwSize = dwNeutralSize; pDataList[cData].pbData = (BYTE*)pNeutral; pLattice[cLattice].dwFrameStart = iFrame++; pLattice[cLattice].dwFrameLen = 1; pLattice[cLattice].dwTotalInfo = 1; pLattice[cLattice].pInfo = pDataList + cLattice; #ifdef HWX_TUNE if (g_pTuneFile != NULL) { fprintf(g_pTuneFile, "%04X %d %d %f\n", pNeutral->wsz[0], pLattice[cLattice].dwFrameStart, pLattice[cLattice].dwFrameLen, 0.0); } #endif #ifdef DUMP_LM_LATTICE if (f!=NULL) { char sz[256]; ZeroMemory( sz, 256 ); #if defined(LM_LATTICE_CHS) WideCharToMultiByte( 936, 0, pNeutral->wsz, -1, sz, 256, 0, 0 ); #elif defined(LM_LATTICE_CHT) WideCharToMultiByte( 950, 0, pNeutral->wsz, -1, sz, 256, 0, 0 ); #elif defined(LM_LATTICE_JPN) WideCharToMultiByte( 932, 0, pNeutral->wsz, -1, sz, 256, 0, 0 ); #endif #if defined(LM_LATTICE_CHS) || defined(LM_LATTICE_CHT) fwprintf(f, L"%s, %d, %d, %d\r\n", pNeutral->wsz, pLattice[cLattice].dwFrameStart, pLattice[cLattice].dwFrameLen, pNeutral->dwUnigram); #elif defined(LM_LATTICE_UNICODE) fwprintf(f, L"%3d %2d %10d %s U+%04X\r\n", pLattice[cLattice].dwFrameStart, pLattice[cLattice].dwFrameLen, pNeutral->dwUnigram, pNeutral->wsz, pNeutral->wsz[0] ); #endif } #endif cData++; cLattice++; } } #ifdef DUMP_LM_LATTICE if (f!=NULL) { fwprintf(f, L"\r\n"); fclose(f); } #endif #ifdef HWX_TUNE if (g_pTuneFile != NULL) { fprintf(g_pTuneFile, "\n"); fflush(g_pTuneFile); } #endif // define "batch" for IIMLanguage nLMITEM = SetLanguageModelCommands(aLMITEM, &sTuneParam, &sHWTip1Param); // Apply IIMLanguage rc = g_pIFELang3->GetLatticeMorphResult( nLMITEM, aLMITEM, cLattice, pLattice, &cElemOut, &pElemOut ); // TPDBG_DMSG2("%08X: GetLatticeMorphResult -> %08X\n", GetCurrentThreadId(), rc); if (SUCCEEDED(rc)) { // Let's validate what came back from IFELang3. The main array of // results must be writable, so we can sort it later. if (cElemOut <= 0 || IsBadWritePtr(pElemOut, cElemOut * sizeof(*pElemOut))) { ASSERT(("IFELang3 pElemOut not readable and writable", FALSE)); goto free_lattice; } // Then validate each of the pointers in the above array for (ii = 0; ii < cElemOut; ii++) { IMLANGUAGE_ELEMENT *pElem = pElemOut + ii; // If the element doesn't have any info items in it, then skip it. if (pElem->dwTotalInfo <= 0) continue; // Otherwise validate the array of info items if (IsBadReadPtr(pElem->pInfo, pElem->dwTotalInfo * sizeof(*(pElem->pInfo)))) { ASSERT(("IFELang3 pElemOut->pInfo not readable", FALSE)); goto free_lattice; } for (jj = 0; jj < pElem->dwTotalInfo; jj++) { // Make sure the info is readable and the size it is supposed to be if (IsBadReadPtr(pElem->pInfo[jj].pbData, pElem->pInfo[jj].dwSize)) { ASSERT(("IFELang3 pElemOut->pInfo->pbData not readable", FALSE)); goto free_lattice; } // We only care about info of this type if (pElem->pInfo[jj].guid == GUID_IMLANGUAGE_INFO_NEUTRAL1) { // Get the pointer to the actual info. pNeutral = (IMLANGUAGE_INFO_NEUTRAL1 *) pElem->pInfo[jj].pbData; // cLattice + 1 is a very loose upper bound on the number of characters // which should be returned; then use the string length to check that // the specified size is correct. if (IsBadStringPtrW(pNeutral->wsz, cLattice + 1) || sizeof(IMLANGUAGE_INFO_NEUTRAL1) + sizeof(WCHAR) * (wcslen(pNeutral->wsz) + 1) > pElem->pInfo[jj].dwSize) { ASSERT(("IFELang3 pElemOut->pInfo->pbData->wsz not readable", FALSE)); goto free_lattice; } } } } // If the language model worked, // trace the path that came back from the model. int cChars = 0; wchar_t *wszBestPath = NULL; // Sort the output lattice elements into time order qsort(pElemOut, cElemOut, sizeof(IMLANGUAGE_ELEMENT), CompareElemTime); // Count the number of characters in the path for (ii=0; iidwTotalInfo; jj++ ) { pNeutral = (IMLANGUAGE_INFO_NEUTRAL1 *) pElem->pInfo[jj].pbData; if (pElem->pInfo[jj].guid==GUID_IMLANGUAGE_INFO_NEUTRAL1) { /* FILE *f=fopen("/log.txt","a+"); fprintf(f,"element %d has %d chars hypothesis %d\n",cChars,wcslen(pNeutral->wsz),pNeutral->fHypothesis); fclose(f); */ cChars += wcslen(pNeutral->wsz); } } } // Allocate space for the path wszBestPath = (wchar_t*)ExternAlloc(sizeof(wchar_t)*(cChars+1)); // If we failed, then just don't update the path if (wszBestPath != NULL) { // Copy the path out of the lattice wszBestPath[0] = 0; for (ii=0; iidwTotalInfo; jj++ ) { pNeutral = (IMLANGUAGE_INFO_NEUTRAL1 *) pElem->pInfo[jj].pbData; if (pElem->pInfo[jj].guid==GUID_IMLANGUAGE_INFO_NEUTRAL1) wcscat(wszBestPath, pNeutral->wsz); } } // Wipe out the post-context character(s) wszBestPath[wcslen(wszBestPath) - cPostContext] = 0; // Skip the pre-context character(s) wszBestPath += cPreContext; // Trace the string through the original lattice, using the // same code that is used for the separator. int nSubs=SearchForTargetResultInternal(lat, wszBestPath); // fprintf(stderr, "Searching for target, nSubs=%d\n", nSubs); // Put the context character(s) back wszBestPath -= cPreContext; if (nSubs!=0) { // FILE *f = fopen("c:/log.txt", "a"); // fprintf(f, "Lost path with %d/%d/%d chars\n", // cPreContext, wcslen(wszBestPath) - cPreContext, cPostContext); // fclose(f); } else { // FILE *f = fopen("c:/log.txt", "a"); // fprintf(f, "Found path with %d/%d/%d chars\n", // cPreContext, wcslen(wszBestPath) - cPreContext, cPostContext); // fclose(f); } ExternFree(wszBestPath); } free_lattice: // And free the memory allocated for us by IIMLanguage. if (cElemOut > 0 && !IsBadReadPtr(pElemOut, cElemOut * sizeof(*pElemOut))) { for (ii=0; ii < cElemOut; ii++) { IMLANGUAGE_ELEMENT *pElem = pElemOut + ii; if (!IsBadReadPtr(pElem->pInfo, sizeof(*(pElem->pInfo)) * pElem->dwTotalInfo)) { // Free each data item in the element. for (jj = 0; jj < pElem->dwTotalInfo; jj++) { if (!IsBadReadPtr(pElem->pInfo[jj].pbData, pElem->pInfo[jj].dwSize)) { CoTaskMemFree(pElem->pInfo[jj].pbData); } } // Free the element array itself. CoTaskMemFree(pElem->pInfo); } } CoTaskMemFree(pElemOut); } } // Free all the memory we allocated. allocated_elements: for (ii = 0; ii < (DWORD)cData; ++ii) { if (pDataList[ii].pbData!=NULL) ExternFree(pDataList[ii].pbData); } ExternFree(pLattice); allocated_data: ExternFree(pDataList); allocated_valid: ExternFree(pValidEnd); } #endif // USE_IFELANG3