1796 lines
50 KiB
C
1796 lines
50 KiB
C
|
//$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
|
||
|
//
|
||
|
// Copyright (c) 2001 Microsoft Corporation. All rights reserved.
|
||
|
//
|
||
|
// Module:
|
||
|
// volcano/dll/CharRec.c
|
||
|
//
|
||
|
// Description:
|
||
|
// Main sequencing code to recognize one character ignoring
|
||
|
// size and position.
|
||
|
//
|
||
|
// Author:
|
||
|
// hrowley
|
||
|
//
|
||
|
//$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
|
||
|
|
||
|
#include "volcanop.h"
|
||
|
#include "frame.h"
|
||
|
#include "glyph.h"
|
||
|
#if defined(USE_HOUND) || defined(USE_ZILLAHOUND)
|
||
|
# include "math16.h"
|
||
|
# include "hound.h"
|
||
|
# include "zillatool.h"
|
||
|
#endif
|
||
|
#ifndef USE_OLD_DATABASES
|
||
|
# include "hawk.h"
|
||
|
#endif
|
||
|
|
||
|
#ifdef USE_RESOURCES
|
||
|
# include "res.h"
|
||
|
#endif
|
||
|
|
||
|
//#define OPTIMAL_OTTER_ZILLA
|
||
|
|
||
|
// Uncomment this to enable use of the old tsunami-style computation
|
||
|
// (using OtterMatch & ZillaMatch instead of OtterMatch2 & ZillaMatch2,
|
||
|
// and index the prob table by codepoint instead of prototype number).
|
||
|
//#define USE_OLD_DATABASES
|
||
|
|
||
|
/////////////////////////////////////////////////////////////////////////
|
||
|
// Hack code for probabilities, this will go away once Hawk works.
|
||
|
|
||
|
#include "probHack.h"
|
||
|
|
||
|
PROB_HEADER *g_pProbHeader = 0;
|
||
|
|
||
|
#define EntryPtr(i) \
|
||
|
(PROB_ENTRY *)(((BYTE *)g_pProbHeader) + g_pProbHeader->aEntryOffset[i])
|
||
|
#define AltPtr(i) \
|
||
|
(PROB_ALT *)(((BYTE *)g_pProbHeader) + g_pProbHeader->aAltOffset[i])
|
||
|
|
||
|
void ProbLoadPointer(void * pData)
|
||
|
{
|
||
|
BYTE *pScan = (BYTE *)pData;
|
||
|
|
||
|
g_pProbHeader = (PROB_HEADER *)pScan;
|
||
|
pScan += sizeof(PROB_HEADER);
|
||
|
}
|
||
|
|
||
|
#ifdef USE_RESOURCES
|
||
|
|
||
|
BOOL ProbLoadRes(
|
||
|
HINSTANCE hInst,
|
||
|
int resNumber,
|
||
|
int resType
|
||
|
) {
|
||
|
BYTE *pByte;
|
||
|
|
||
|
// Load the prob database
|
||
|
pByte = DoLoadResource(NULL, hInst, resNumber, resType);
|
||
|
if (!pByte) {
|
||
|
return FALSE;
|
||
|
}
|
||
|
ProbLoadPointer(pByte);
|
||
|
|
||
|
return TRUE;
|
||
|
}
|
||
|
|
||
|
#else
|
||
|
|
||
|
BOOL ProbLoadFile(wchar_t *pPath, LOAD_INFO *pInfo)
|
||
|
{
|
||
|
HANDLE hFile, hMap;
|
||
|
BYTE *pByte;
|
||
|
wchar_t aFile[128];
|
||
|
|
||
|
pInfo->hFile = INVALID_HANDLE_VALUE;
|
||
|
pInfo->hMap = INVALID_HANDLE_VALUE;
|
||
|
pInfo->pbMapping = INVALID_HANDLE_VALUE;
|
||
|
|
||
|
// Generate path to file.
|
||
|
FormatPath(aFile, pPath, (wchar_t *)0, (wchar_t *)0, (wchar_t *)0, L"prob.bin");
|
||
|
|
||
|
// Map the file
|
||
|
hFile = CreateMappingCall(
|
||
|
aFile,
|
||
|
GENERIC_READ,
|
||
|
FILE_SHARE_READ,
|
||
|
NULL,
|
||
|
OPEN_EXISTING,
|
||
|
FILE_ATTRIBUTE_NORMAL,
|
||
|
NULL
|
||
|
);
|
||
|
|
||
|
if (hFile == INVALID_HANDLE_VALUE)
|
||
|
{
|
||
|
ASSERT(("Error in CreateMappingCall - prob", FALSE));
|
||
|
goto error1;
|
||
|
}
|
||
|
|
||
|
// Create a mapping handle
|
||
|
hMap = CreateFileMapping(hFile, NULL, PAGE_READONLY, 0, 0, NULL);
|
||
|
if (hMap == NULL)
|
||
|
{
|
||
|
ASSERT(("Error in CreateFileMapping - prob", FALSE));
|
||
|
goto error2;
|
||
|
}
|
||
|
|
||
|
// Map the entire file starting at the first byte
|
||
|
pByte = (LPBYTE) MapViewOfFile(hMap, FILE_MAP_READ, 0, 0, 0);
|
||
|
if (pByte == NULL) {
|
||
|
ASSERT(("Error in MapViewOfFile - prob", FALSE));
|
||
|
goto error3;
|
||
|
}
|
||
|
|
||
|
// Extract info from mapped data.
|
||
|
ProbLoadPointer((void *)pByte);
|
||
|
|
||
|
// Save away the pointers so we can close up cleanly later
|
||
|
pInfo->hFile = hFile;
|
||
|
pInfo->hMap = hMap;
|
||
|
pInfo->pbMapping = pByte;
|
||
|
|
||
|
return TRUE;
|
||
|
|
||
|
// Error handling
|
||
|
error3:
|
||
|
CloseHandle(hMap);
|
||
|
hMap = INVALID_HANDLE_VALUE;
|
||
|
|
||
|
error2:
|
||
|
CloseHandle(hFile);
|
||
|
hFile = INVALID_HANDLE_VALUE;
|
||
|
|
||
|
error1:
|
||
|
|
||
|
return FALSE;
|
||
|
}
|
||
|
|
||
|
BOOL ProbUnLoadFile(LOAD_INFO *pInfo)
|
||
|
{
|
||
|
if (pInfo->hFile == INVALID_HANDLE_VALUE ||
|
||
|
pInfo->hMap == INVALID_HANDLE_VALUE ||
|
||
|
pInfo->pbMapping == INVALID_HANDLE_VALUE) {
|
||
|
return FALSE;
|
||
|
}
|
||
|
|
||
|
UnmapViewOfFile(pInfo->pbMapping);
|
||
|
CloseHandle(pInfo->hMap);
|
||
|
CloseHandle(pInfo->hFile);
|
||
|
|
||
|
pInfo->pbMapping = INVALID_HANDLE_VALUE;
|
||
|
pInfo->hMap = INVALID_HANDLE_VALUE;
|
||
|
pInfo->hFile = INVALID_HANDLE_VALUE;
|
||
|
|
||
|
return TRUE;
|
||
|
}
|
||
|
|
||
|
#endif
|
||
|
|
||
|
// Given an alt list with dense and possibly folded codes in it, run through it
|
||
|
// and expand the folded lists. The unfolded alt list is returned in place.
|
||
|
// This function assumes that the list begins with better alternates, as those
|
||
|
// later in the list will get dropped if we run out of space.
|
||
|
void UnfoldCodes(ALT_LIST *pAltList, CHARSET *cs)
|
||
|
{
|
||
|
int i, cOut=0;
|
||
|
ALT_LIST newAltList; // This will be where the new alt list is constructed.
|
||
|
|
||
|
// For each alternate in the input list and while we have space in the output list
|
||
|
for (i=0; i<(int)pAltList->cAlt && (int)cOut<MAX_ALT_LIST; i++) {
|
||
|
|
||
|
// Check if the alternate is a folded coded
|
||
|
if (LocRunIsFoldedCode(&g_locRunInfo,pAltList->awchList[i])) {
|
||
|
int kndex;
|
||
|
// If it is a folded code, look up the folding set
|
||
|
wchar_t *pFoldingSet = LocRunFolded2FoldingSet(&g_locRunInfo, pAltList->awchList[i]);
|
||
|
|
||
|
// Run through the folding set, adding non-NUL items to the output list
|
||
|
// (until the output list is full)
|
||
|
for (kndex = 0;
|
||
|
kndex < LOCRUN_FOLD_MAX_ALTERNATES && pFoldingSet[kndex] != 0 && (int)cOut<MAX_ALT_LIST;
|
||
|
kndex++) {
|
||
|
if (IsAllowedChar(&g_locRunInfo, cs, pFoldingSet[kndex]))
|
||
|
{
|
||
|
newAltList.awchList[cOut]=pFoldingSet[kndex];
|
||
|
newAltList.aeScore[cOut]=pAltList->aeScore[i];
|
||
|
cOut++;
|
||
|
#ifdef DISABLE_UNFOLDING
|
||
|
// If unfolding is disabled, then stop after producing one unfolded code.
|
||
|
// This way we don't push results later in the alt list out of the alt
|
||
|
// list, while still allowing the recognizer to return unicodes for each
|
||
|
// alternate.
|
||
|
break;
|
||
|
#endif
|
||
|
}
|
||
|
}
|
||
|
} else {
|
||
|
// Dense codes that are not folded get added directly
|
||
|
newAltList.awchList[cOut]=pAltList->awchList[i];
|
||
|
newAltList.aeScore[cOut]=pAltList->aeScore[i];
|
||
|
cOut++;
|
||
|
}
|
||
|
}
|
||
|
// Store the length of the output list
|
||
|
newAltList.cAlt=cOut;
|
||
|
|
||
|
// Copy the output list over the input.
|
||
|
*pAltList=newAltList;
|
||
|
}
|
||
|
|
||
|
#ifdef USE_OLD_DATABASES
|
||
|
|
||
|
// Used for WinCE
|
||
|
// Given a feature space (cFrame), an alt list, and a requested number of alts, this
|
||
|
// function returns a new alt list with probabilities for each alternate. It uses a
|
||
|
// fixed prob distribution.
|
||
|
int GetProbsTsunamiFixedTable(
|
||
|
int cFrame,
|
||
|
ALT_LIST *pAltList,
|
||
|
int maxAlts,
|
||
|
RECOG_ALT *pRAlts,
|
||
|
CHARSET *pCS
|
||
|
) {
|
||
|
int rank = 0;
|
||
|
FLOAT rankScore = pAltList->aeScore[0];
|
||
|
int cAlt;
|
||
|
int iDest = 0;
|
||
|
|
||
|
for (cAlt = 0; cAlt < (int) pAltList->cAlt && iDest < maxAlts; ++cAlt)
|
||
|
{
|
||
|
if (pAltList->aeScore[cAlt] != rankScore)
|
||
|
{
|
||
|
rank ++;
|
||
|
rankScore = pAltList->aeScore[cAlt];
|
||
|
}
|
||
|
|
||
|
if (IsAllowedChar(&g_locRunInfo, pCS, pAltList->awchList[cAlt]))
|
||
|
{
|
||
|
int count;
|
||
|
switch (rank) {
|
||
|
case 0:
|
||
|
count = 141125;
|
||
|
break;
|
||
|
case 1:
|
||
|
count = 6090;
|
||
|
break;
|
||
|
case 2:
|
||
|
count = 957;
|
||
|
break;
|
||
|
case 3:
|
||
|
count = 362;
|
||
|
break;
|
||
|
case 4:
|
||
|
count = 161;
|
||
|
break;
|
||
|
case 5:
|
||
|
count = 82;
|
||
|
break;
|
||
|
case 6:
|
||
|
count = 66;
|
||
|
break;
|
||
|
case 7:
|
||
|
count = 49;
|
||
|
break;
|
||
|
case 8:
|
||
|
count = 36;
|
||
|
break;
|
||
|
case 9:
|
||
|
count = 34;
|
||
|
break;
|
||
|
default:
|
||
|
count = 10;
|
||
|
break;
|
||
|
}
|
||
|
pRAlts[iDest].wch = pAltList->awchList[cAlt];
|
||
|
pRAlts[iDest].prob = 65535*(float)count/(float)149903;
|
||
|
iDest++;
|
||
|
}
|
||
|
}
|
||
|
return iDest;
|
||
|
}
|
||
|
|
||
|
// Desktop
|
||
|
// Given a feature space (cFrame), an alt list, and a requested number of alts, this
|
||
|
// function returns a new alt list with probabilities for each alternate. The version
|
||
|
// called GetProbs in this file does the lookup by prototype number, whereas this version
|
||
|
// does lookups by code point (like the code in Tsunami). Note that the alt list passed
|
||
|
// in will get modified.
|
||
|
int GetProbsTsunami(
|
||
|
int cFrame,
|
||
|
ALT_LIST *pAltList,
|
||
|
int maxAlts,
|
||
|
RECOG_ALT *pRAlts,
|
||
|
CHARSET *pCS
|
||
|
) {
|
||
|
unsigned int cAlt;
|
||
|
int ii;
|
||
|
int iDest = 0;
|
||
|
PROB_ENTRY *pEntries, *pEntriesStart, *pEntriesEnd;
|
||
|
PROB_ALT *pAlts, *pAltsStart, *pAltsEnd;
|
||
|
|
||
|
// If we didn't get any alternates, return an empty list.
|
||
|
if (pAltList->cAlt == 0) {
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
// If the probability table was not loaded, just return the top one candidate.
|
||
|
// This is useful for training the prob table.
|
||
|
if (g_pProbHeader==NULL) {
|
||
|
pRAlts[0].wch=pAltList->awchList[0];
|
||
|
pRAlts[0].prob=MAX_PROB;
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
// ASSERT(1 <= cFrame && cFrame < 30);
|
||
|
ASSERT(1 <= cFrame);
|
||
|
if (cFrame >= 30) {
|
||
|
// Can't handle this many strokes.
|
||
|
goto fakeIt;
|
||
|
}
|
||
|
|
||
|
// Hack for U+307A/U+30DA, which probably haven't had their probs set up right
|
||
|
/* if (LocRunDense2Unicode(&g_locRunInfo,pAltList->awchList[0])==0x307A ||
|
||
|
LocRunDense2Unicode(&g_locRunInfo,pAltList->awchList[0])==0x30DA) {
|
||
|
pRAlts[0].wch = LocRunUnicode2Dense(&g_locRunInfo,0x30DA);
|
||
|
pRAlts[0].prob = MAX_PROB;
|
||
|
pRAlts[1].wch = LocRunUnicode2Dense(&g_locRunInfo,0x307A);
|
||
|
pRAlts[1].prob = MAX_PROB;
|
||
|
return 2;
|
||
|
} */
|
||
|
|
||
|
pEntriesStart = EntryPtr(cFrame - 1);
|
||
|
pEntriesEnd = EntryPtr(cFrame);
|
||
|
pAltsStart = AltPtr(cFrame - 1);
|
||
|
pAltsEnd = AltPtr(cFrame);
|
||
|
|
||
|
// Scan until we find an alt that has a prob list.
|
||
|
// Normally we stop on the first one, but sometimes
|
||
|
// We had no train data to cause a prototype to come
|
||
|
// up top one.
|
||
|
for (cAlt = 0; cAlt < pAltList->cAlt; ++cAlt) {
|
||
|
// Get char to look up.
|
||
|
// wchar_t wch = LocRunDense2Unicode(&g_locRunInfo,pAltList->awchList[cAlt]);
|
||
|
wchar_t wch = pAltList->awchList[cAlt];
|
||
|
|
||
|
pAlts = pAltsStart;
|
||
|
for (pEntries = pEntriesStart; pEntries < pEntriesEnd; ++pEntries) {
|
||
|
if (pEntries->wch == wch) {
|
||
|
// copy results out.
|
||
|
for (ii = 0; ii < pEntries->cAlts && iDest < maxAlts; ++ii) {
|
||
|
if (IsAllowedChar(&g_locRunInfo, pCS, pAlts->wchAlt))
|
||
|
{
|
||
|
pRAlts[iDest].wch = pAlts->wchAlt;
|
||
|
pRAlts[iDest].prob = pAlts->prob;
|
||
|
iDest++;
|
||
|
}
|
||
|
++pAlts;
|
||
|
}
|
||
|
return iDest;
|
||
|
}
|
||
|
pAlts += pEntries->cAlts;
|
||
|
}
|
||
|
}
|
||
|
fakeIt:
|
||
|
// Fake something up.
|
||
|
pRAlts[0].wch = pAltList->awchList[0];
|
||
|
pRAlts[0].prob = MAX_PROB;
|
||
|
// fprintf(stderr,"Returning no alts\n");
|
||
|
// exit(1);
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
#endif
|
||
|
// USE_OLD_DATABASES
|
||
|
|
||
|
// End of hacked Prob code.
|
||
|
////////////////////////////////////////////////////////////////////////
|
||
|
|
||
|
BOOL g_fUseJaws;
|
||
|
JAWS_LOAD_INFO g_JawsLoadInfo;
|
||
|
FUGU_LOAD_INFO g_FuguLoadInfo;
|
||
|
SOLE_LOAD_INFO g_SoleLoadInfo;
|
||
|
BOOL g_fUseZillaHound;
|
||
|
|
||
|
#ifdef USE_RESOURCES
|
||
|
|
||
|
#include "res.h"
|
||
|
|
||
|
// Code to load and initialize the databases used.
|
||
|
// They are loaded in this order: otter, zilla, crane/prob or hawk,
|
||
|
BOOL LoadCharRec(HINSTANCE hInstanceDll)
|
||
|
{
|
||
|
BOOL fError = FALSE;
|
||
|
|
||
|
if (JawsLoadRes(&g_JawsLoadInfo, hInstanceDll, RESID_JAWS, VOLCANO_RES))
|
||
|
{
|
||
|
// Now we need to load the databases that will be combined by this combiner
|
||
|
|
||
|
// Load the Fugu database
|
||
|
if (!fError && !FuguLoadRes(&g_FuguLoadInfo, hInstanceDll, RESID_FUGU, VOLCANO_RES, &g_locRunInfo))
|
||
|
{
|
||
|
fError = TRUE;
|
||
|
ASSERT(("Error in FuguLoadRes", FALSE));
|
||
|
}
|
||
|
|
||
|
// Load the Sole database
|
||
|
if (!fError && !SoleLoadRes(&g_SoleLoadInfo, hInstanceDll, RESID_SOLE, VOLCANO_RES, &g_locRunInfo))
|
||
|
{
|
||
|
fError = TRUE;
|
||
|
ASSERT(("Error loading sole", FALSE));
|
||
|
}
|
||
|
g_fUseJaws = TRUE;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
// Load the Otter database
|
||
|
if (!fError && !OtterLoadRes(hInstanceDll, RESID_OTTER, VOLCANO_RES, &g_locRunInfo))
|
||
|
{
|
||
|
fError = TRUE;
|
||
|
ASSERT(("Error in OtterLoadRes", FALSE));
|
||
|
}
|
||
|
g_fUseJaws = FALSE;
|
||
|
}
|
||
|
|
||
|
#if defined(USE_ZILLA) || defined(USE_ZILLAHOUND)
|
||
|
// Load the Zilla database
|
||
|
if (!fError && !ZillaLoadResource(
|
||
|
hInstanceDll, RESID_ZILLA, VOLCANO_RES, RESID_COSTCALC,
|
||
|
VOLCANO_RES, RESID_GEOSTAT, VOLCANO_RES, &g_locRunInfo
|
||
|
)) {
|
||
|
fError = TRUE;
|
||
|
ASSERT(("Error in ZillaLoadResource", FALSE));
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
#if defined(USE_HOUND)
|
||
|
// Load the Hound database (Hound only, require it to load)
|
||
|
if (!fError && !HoundLoadRes(hInstanceDll, RESID_HOUND, VOLCANO_RES, &g_locRunInfo)) {
|
||
|
fError = TRUE;
|
||
|
ASSERT(("Error in HoundLoadRes", FALSE));
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
g_fUseZillaHound = FALSE;
|
||
|
#if defined(USE_ZILLAHOUND)
|
||
|
if (!fError) {
|
||
|
// Load the Hound & Hound-Zilla databases (This is optional).
|
||
|
if (HoundLoadRes(hInstanceDll, RESID_HOUND, VOLCANO_RES, &g_locRunInfo)) {
|
||
|
if (ZillaHoundLoadRes(hInstanceDll, RESID_ZILLA_HOUND, VOLCANO_RES)) {
|
||
|
g_fUseZillaHound = TRUE;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
// Load the Hawk database.
|
||
|
#ifndef USE_OLD_DATABASES
|
||
|
if (!fError && !HawkLoadRes(
|
||
|
hInstanceDll, RESID_HAWK, VOLCANO_RES, &g_locRunInfo
|
||
|
)) {
|
||
|
fError = TRUE;
|
||
|
ASSERT(("Error in HawkLoadRes", FALSE));
|
||
|
}
|
||
|
#else
|
||
|
if (!fError && !CraneLoadRes(hInstanceDll,RESID_CRANE,VOLCANO_RES,&g_locRunInfo)) {
|
||
|
fError=TRUE;
|
||
|
ASSERT(("Error in CraneLoadRes", FALSE));
|
||
|
}
|
||
|
|
||
|
// Load hack probability code until we switch over to hawk.
|
||
|
// Use hawks resID so we don't have to create an extra one.
|
||
|
#if !defined(WINCE) && !defined(FAKE_WINCE)
|
||
|
if (!fError && !ProbLoadRes(
|
||
|
hInstanceDll, RESID_HAWK, VOLCANO_RES
|
||
|
)) {
|
||
|
// Failing to load this is no longer an error,
|
||
|
// just fall back on the WinCE method.
|
||
|
// fError = TRUE;
|
||
|
// ASSERT(("Error in ProbLoadRes", FALSE));
|
||
|
}
|
||
|
#endif
|
||
|
#endif
|
||
|
|
||
|
// Did everything load correctly?
|
||
|
if (fError) {
|
||
|
// JBENN: If the databases can ever be unloaded, this is
|
||
|
// a place the need to.
|
||
|
|
||
|
// JBENN: FIXME: Set correct error code base on what really went wrong.
|
||
|
SetLastError(ERROR_RESOURCE_NAME_NOT_FOUND);
|
||
|
//SetLastError(ERROR_RESOURCE_DATA_NOT_FOUND);
|
||
|
//SetLastError(ERROR_RESOURCE_TYPE_NOT_FOUND);
|
||
|
//SetLastError(ERROR_OUTOFMEMORY);
|
||
|
|
||
|
return FALSE;
|
||
|
}
|
||
|
|
||
|
return TRUE;
|
||
|
}
|
||
|
|
||
|
// Code to unload the databases used.
|
||
|
BOOL
|
||
|
UnloadCharRec()
|
||
|
{
|
||
|
BOOL retVal;
|
||
|
|
||
|
retVal = TRUE;
|
||
|
|
||
|
// Free hound up.
|
||
|
# if defined(USE_HOUND)
|
||
|
if (!HoundUnLoadRes())
|
||
|
{
|
||
|
retVal = FALSE;
|
||
|
}
|
||
|
# endif
|
||
|
|
||
|
# if defined(USE_ZILLAHOUND)
|
||
|
if (g_fUseZillaHound && !HoundUnLoadRes())
|
||
|
{
|
||
|
retVal = FALSE;
|
||
|
}
|
||
|
# endif
|
||
|
|
||
|
if (!ZillaUnloadResource())
|
||
|
{
|
||
|
retVal = FALSE;
|
||
|
}
|
||
|
|
||
|
return retVal;
|
||
|
}
|
||
|
|
||
|
# else
|
||
|
|
||
|
// Global load information specific to loading from files.
|
||
|
#if defined(USE_OTTER) || defined(USE_OTTERFUGU)
|
||
|
OTTER_LOAD_INFO g_OtterLoadInfo;
|
||
|
#endif
|
||
|
#if defined(USE_HOUND) || defined(USE_ZILLAHOUND)
|
||
|
LOAD_INFO g_HoundLoadInfo;
|
||
|
#endif
|
||
|
#ifdef USE_OLD_DATABASES
|
||
|
LOAD_INFO g_ProbLoadInfo;
|
||
|
CRANE_LOAD_INFO g_CraneLoadInfo;
|
||
|
#else
|
||
|
LOAD_INFO g_HawkLoadInfo;
|
||
|
#endif
|
||
|
|
||
|
// Code to load and initialize the databases used.
|
||
|
BOOL LoadCharRec(wchar_t *pPath)
|
||
|
{
|
||
|
BOOL fError = FALSE;
|
||
|
|
||
|
if (JawsLoadFile(&g_JawsLoadInfo, pPath))
|
||
|
{
|
||
|
// Load the Fugu database
|
||
|
if (!fError && !FuguLoadFile(&g_FuguLoadInfo, pPath, &g_locRunInfo)) {
|
||
|
fError = TRUE;
|
||
|
ASSERT(("Error in FuguLoadFile", FALSE));
|
||
|
}
|
||
|
|
||
|
// Load the Sole database
|
||
|
if (!fError && !SoleLoadFile(&g_SoleLoadInfo, pPath, &g_locRunInfo)) {
|
||
|
fError = TRUE;
|
||
|
ASSERT(("Error in FuguLoadFile", FALSE));
|
||
|
}
|
||
|
g_fUseJaws = TRUE;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
// Load the Otter database
|
||
|
if (!fError && !OtterLoadFile(&g_locRunInfo, &g_OtterLoadInfo, pPath)) {
|
||
|
fError = TRUE;
|
||
|
ASSERT(("Error in OtterLoadFile", FALSE));
|
||
|
}
|
||
|
g_fUseJaws = FALSE;
|
||
|
}
|
||
|
|
||
|
#if defined(USE_ZILLA) || defined(USE_ZILLAHOUND)
|
||
|
// Load the Zilla database
|
||
|
if (!fError && !ZillaLoadFile(&g_locRunInfo, pPath, TRUE)) {
|
||
|
fError = TRUE;
|
||
|
ASSERT(("Error in ZillaLoadFile", FALSE));
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
#if defined(USE_HOUND)
|
||
|
// Load the Hound database (Hound only, require it to load)
|
||
|
if (!fError && !HoundLoadFile(&g_locRunInfo, &g_HoundLoadInfo, pPath)) {
|
||
|
fError = TRUE;
|
||
|
ASSERT(("Error in HoundLoadFile", FALSE));
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
g_fUseZillaHound = FALSE;
|
||
|
#if defined(USE_ZILLAHOUND)
|
||
|
if (!fError) {
|
||
|
// Load the Hound & Hound-Zilla databases (This is optional).
|
||
|
if (HoundLoadFile(&g_locRunInfo, &g_HoundLoadInfo, pPath)) {
|
||
|
if (ZillaHoundLoadFile(pPath)) {
|
||
|
g_fUseZillaHound = TRUE;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
# ifndef TRAIN_ZILLA_HOUND_COMBINER
|
||
|
HoundUnLoadFile(&g_HoundLoadInfo);
|
||
|
# endif
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
#ifndef USE_OLD_DATABASES
|
||
|
// Load the Hawk database.
|
||
|
if (!fError && !HawkLoadFile(&g_locRunInfo, &g_HawkLoadInfo, pPath)) {
|
||
|
fError = TRUE;
|
||
|
ASSERT(("Error in HawkLoadFile", FALSE));
|
||
|
}
|
||
|
|
||
|
#else
|
||
|
#if !defined(WINCE) && !defined(FAKE_WINCE)
|
||
|
// Load hack probability code until we switch over to hawk.
|
||
|
if (!fError && !ProbLoadFile(pPath, &g_ProbLoadInfo)) {
|
||
|
// Failing to load this is no longer an error,
|
||
|
// just fall back on the WinCE method.
|
||
|
// fError = TRUE;
|
||
|
// ASSERT(("Error in ProbLoadFile", FALSE));
|
||
|
}
|
||
|
#endif
|
||
|
if (!fError && !CraneLoadFile(&g_locRunInfo,&g_CraneLoadInfo, pPath)) {
|
||
|
fError = TRUE;
|
||
|
ASSERT(("Error in CraneLoadFile", FALSE));
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
// Did everything load correctly?
|
||
|
if (fError) {
|
||
|
// JBENN: If the databases can ever be unloaded, this is
|
||
|
// a place the need to.
|
||
|
|
||
|
// JBENN: FIXME: Set correct error code base on what really went wrong.
|
||
|
SetLastError(ERROR_RESOURCE_NAME_NOT_FOUND);
|
||
|
//SetLastError(ERROR_RESOURCE_DATA_NOT_FOUND);
|
||
|
//SetLastError(ERROR_RESOURCE_TYPE_NOT_FOUND);
|
||
|
//SetLastError(ERROR_OUTOFMEMORY);
|
||
|
|
||
|
return FALSE;
|
||
|
}
|
||
|
|
||
|
return TRUE;
|
||
|
}
|
||
|
|
||
|
// Code to unload the databases used.
|
||
|
BOOL
|
||
|
UnloadCharRec()
|
||
|
{
|
||
|
BOOL ok = TRUE;
|
||
|
if (g_fUseJaws)
|
||
|
{
|
||
|
if (!SoleUnloadFile(&g_SoleLoadInfo)) ok = FALSE;
|
||
|
if (!FuguUnLoadFile(&g_FuguLoadInfo)) ok = FALSE;
|
||
|
if (!JawsUnloadFile(&g_JawsLoadInfo)) ok = FALSE;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
if (!OtterUnLoadFile(&g_OtterLoadInfo)) ok = FALSE;
|
||
|
}
|
||
|
|
||
|
# if defined(USE_HOUND)
|
||
|
if (!HoundUnLoadFile(&g_HoundLoadInfo))
|
||
|
{
|
||
|
ok = FALSE;
|
||
|
}
|
||
|
# endif
|
||
|
|
||
|
# if defined(USE_ZILLAHOUND)
|
||
|
if (g_fUseZillaHound)
|
||
|
{
|
||
|
if (!ZillaHoundUnloadFile())
|
||
|
{
|
||
|
ok = FALSE;
|
||
|
}
|
||
|
if (!HoundUnLoadFile(&g_HoundLoadInfo))
|
||
|
{
|
||
|
ok = FALSE;
|
||
|
}
|
||
|
}
|
||
|
# endif
|
||
|
|
||
|
if (!ZillaUnLoadFile()) ok = FALSE;
|
||
|
# ifdef USE_OLD_DATABASES
|
||
|
if (!CraneUnLoadFile(&g_CraneLoadInfo)) ok = FALSE;
|
||
|
# if !defined(WINCE) && !defined(FAKE_WINCE)
|
||
|
if (g_pProbHeader != NULL && !ProbUnLoadFile(&g_ProbLoadInfo)) ok = FALSE;
|
||
|
# endif
|
||
|
# else // USE_OLD_DATABASES
|
||
|
if (!HawkUnLoadFile(&g_HawkLoadInfo)) ok = FALSE;
|
||
|
# endif // USE_OLD_DATABASES
|
||
|
return ok;
|
||
|
}
|
||
|
|
||
|
#endif
|
||
|
|
||
|
// Limit on strokes that can be processed by a recognizer. Since
|
||
|
// Zilla ignores anything beyond 29 strokes, it is safe to ignore
|
||
|
// any extra.
|
||
|
#define MAX_STOKES_PROCESS 30
|
||
|
|
||
|
POINT *DupPoints(POINT *pOldPoints, int nPoints);
|
||
|
GLYPH *GlyphFromStrokes(UINT cStrokes, STROKE *pStrokes);
|
||
|
|
||
|
|
||
|
#ifndef USE_RESOURCES
|
||
|
// Build a copy of the glyph structure.
|
||
|
GLYPH *CopyGlyph(GLYPH *pOldGlyph)
|
||
|
{
|
||
|
GLYPH *pGlyph = NULL, *pLastGlyph = NULL;
|
||
|
|
||
|
// Convert strokes to GLYPHs and FRAMEs so that we can call the
|
||
|
// old code.
|
||
|
while (pOldGlyph != NULL) {
|
||
|
GLYPH *pGlyphCur;
|
||
|
|
||
|
// Alloc glyph.
|
||
|
pGlyphCur = NewGLYPH();
|
||
|
if (!pGlyphCur) {
|
||
|
goto error;
|
||
|
}
|
||
|
|
||
|
// Add to list, and alloc frame
|
||
|
if (pLastGlyph != NULL) {
|
||
|
pLastGlyph->next = pGlyphCur;
|
||
|
pLastGlyph = pGlyphCur;
|
||
|
} else {
|
||
|
pGlyph = pGlyphCur;
|
||
|
pLastGlyph = pGlyphCur;
|
||
|
}
|
||
|
pGlyphCur->next = NULL;
|
||
|
pGlyphCur->frame = NewFRAME();
|
||
|
if (!pGlyphCur->frame) {
|
||
|
goto error;
|
||
|
}
|
||
|
|
||
|
// Fill in frame. We just fill in what we need, and ignore
|
||
|
// fields not used by Otter and Zilla, or are set by them.
|
||
|
pGlyphCur->frame->info.cPnt = pOldGlyph->frame->info.cPnt;
|
||
|
pGlyphCur->frame->info.wPdk = pOldGlyph->frame->info.wPdk;
|
||
|
pGlyphCur->frame->rgrawxy = DupPoints(pOldGlyph->frame->rgrawxy, pOldGlyph->frame->info.cPnt);
|
||
|
pGlyphCur->frame->rect = pOldGlyph->frame->rect;
|
||
|
pGlyphCur->frame->iframe = pOldGlyph->frame->iframe;
|
||
|
|
||
|
if (pGlyphCur->frame->rgrawxy == NULL) {
|
||
|
goto error;
|
||
|
}
|
||
|
|
||
|
pOldGlyph = pOldGlyph->next;
|
||
|
}
|
||
|
|
||
|
return pGlyph;
|
||
|
|
||
|
error:
|
||
|
// Cleanup glyphs on error.
|
||
|
if (pGlyph != NULL) {
|
||
|
DestroyFramesGLYPH(pGlyph);
|
||
|
DestroyGLYPH(pGlyph);
|
||
|
}
|
||
|
return NULL;
|
||
|
}
|
||
|
#endif // !USE_RESOURCES
|
||
|
|
||
|
#ifdef USE_OLD_DATABASES
|
||
|
/******************************Public*Routine******************************\
|
||
|
* AdHocRuleCost
|
||
|
*
|
||
|
* Because of character folding and the inability of the shape matchers
|
||
|
* to distinguish between a cluster a 1000 samples map to versus 1 point
|
||
|
* mapping to it we have a few hard rule we throw in to fix obvious
|
||
|
* problems.
|
||
|
*
|
||
|
* History:
|
||
|
* 11-Jul-1995 -by- Patrick Haluptzok patrickh
|
||
|
* Wrote it.
|
||
|
\**************************************************************************/
|
||
|
|
||
|
float AdHocRuleCost(int cStrokes, wchar_t dch, VOLCANO_WEIGHTS *pScores)
|
||
|
{
|
||
|
#ifdef DISABLE_HEURISTICS
|
||
|
return 0;
|
||
|
#else
|
||
|
wchar_t wch;
|
||
|
int cFrame;
|
||
|
|
||
|
// Get character and number of strokes. Note we need character in Unicode
|
||
|
// so that we can compare with constant character codes.
|
||
|
|
||
|
// ASSUMPTION: SYM_UNKNOWN should be the only sym if its present.
|
||
|
// So there aren't any alternatives that could get a "better" cost
|
||
|
// so it probably doesn't really matter what cost we return here
|
||
|
if (dch == SYM_UNKNOWN)
|
||
|
{
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
wch = LocRunDense2Unicode(&g_locRunInfo, dch);
|
||
|
cFrame = cStrokes;
|
||
|
|
||
|
// Check for 0 (2 strokes), penalize all circle shapes
|
||
|
// except 0 when 2 strokes occur.
|
||
|
if (cFrame >= 2)
|
||
|
{
|
||
|
// 0x824f is the 0 that we don't want to penalize.
|
||
|
// All other circle shapes are penalized.
|
||
|
if ((wch == 0x006F) ||
|
||
|
(wch == 0x004F) ||
|
||
|
(wch == 0x00B0) ||
|
||
|
(wch == 0x3002) ||
|
||
|
(wch == 0x3007)
|
||
|
)
|
||
|
{
|
||
|
pScores->afl[VTUNE_ADHOC_CIRCLE] = -1;
|
||
|
return -g_vtuneInfo.pTune->weights.afl[VTUNE_ADHOC_CIRCLE];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Check for 1 stroke lower-case i and j. No dot is a extra penalty.
|
||
|
if (cFrame == 1)
|
||
|
{
|
||
|
if ((wch == 0x0069) || (wch == 0x006A))
|
||
|
{
|
||
|
pScores->afl[VTUNE_ADHOC_IJ] = -1;
|
||
|
return -g_vtuneInfo.pTune->weights.afl[VTUNE_ADHOC_IJ];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return 0;
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
BOOL Afterburn(ALT_LIST *pAltList, GLYPH *pGlyph, CHARSET *cs, RECT *rGuide, RECT rc)
|
||
|
{
|
||
|
DRECTS drcs;
|
||
|
|
||
|
if (pGlyph==NULL || rGuide==NULL)
|
||
|
return FALSE;
|
||
|
|
||
|
// Scale and translate the guide box to compute the 'delta rectangle'
|
||
|
|
||
|
drcs.x = rGuide->left;
|
||
|
drcs.y = rGuide->top;
|
||
|
drcs.w = rGuide->right - rGuide->left;
|
||
|
drcs.h = rGuide->bottom - rGuide->top;
|
||
|
|
||
|
// Translate, convert to delta form
|
||
|
rc.left -= drcs.x;
|
||
|
rc.top -= drcs.y;
|
||
|
rc.right -= (drcs.x + rc.left);
|
||
|
rc.bottom -= (drcs.y + rc.top);
|
||
|
|
||
|
// Scale. We do isotropic scaling and center the shorter dimension.
|
||
|
if (drcs.w > drcs.h) {
|
||
|
drcs.x = ((1000 * rc.left) / drcs.w);
|
||
|
drcs.y = ((1000 * rc.top) / drcs.w) + ((drcs.w - drcs.h) / 2);
|
||
|
drcs.h = ((1000 * rc.bottom) / drcs.w);
|
||
|
drcs.w = ((1000 * rc.right) / drcs.w);
|
||
|
} else {
|
||
|
drcs.x = ((1000 * rc.left) / drcs.h) + ((drcs.h - drcs.w) / 2);
|
||
|
drcs.y = ((1000 * rc.top) / drcs.h);
|
||
|
drcs.w = ((1000 * rc.right) / drcs.h);
|
||
|
drcs.h = ((1000 * rc.bottom) / drcs.h);
|
||
|
}
|
||
|
|
||
|
#ifndef DISABLE_HEURISTICS
|
||
|
return CraneMatch(pAltList, MAX_ALT_LIST, pGlyph, cs, &drcs, 0, &g_locRunInfo);
|
||
|
#else
|
||
|
return FALSE;
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
// Hack to get around lack of data for training Crane
|
||
|
BOOL IsFaultyKana(wchar_t wch)
|
||
|
{
|
||
|
switch (wch) {
|
||
|
// case 0x3041:
|
||
|
case 0x3042:
|
||
|
// case 0x3043:
|
||
|
case 0x3044:
|
||
|
// case 0x3045:
|
||
|
case 0x3046:
|
||
|
// case 0x3047:
|
||
|
case 0x3048:
|
||
|
// case 0x3049:
|
||
|
case 0x304A:
|
||
|
// case 0x30E9:
|
||
|
return TRUE;
|
||
|
}
|
||
|
return FALSE;
|
||
|
}
|
||
|
#endif // USE_OLD_DATABASES
|
||
|
|
||
|
// Sort the alternate list.
|
||
|
// We do a bubble sort. The list is small and we can't use qsort because the data is stored in
|
||
|
// three parallel arrays.
|
||
|
void SortAltListAndTune(ALT_LIST *pAltList, VOLCANO_WEIGHTS *pTuneScore)
|
||
|
{
|
||
|
int pos1, pos2;
|
||
|
int limit1, limit2;
|
||
|
FLOAT * const peScore = pAltList->aeScore;
|
||
|
wchar_t * const pwchList = pAltList->awchList;
|
||
|
|
||
|
limit2 = pAltList->cAlt;
|
||
|
limit1 = limit2 - 1;
|
||
|
for (pos1 = 0; pos1 < limit1; ++pos1) {
|
||
|
for (pos2 = pos1 + 1; pos2 < limit2; ++pos2) {
|
||
|
// Are elements pos1 and pos2 out of order?
|
||
|
if (peScore[pos1] < peScore[pos2]) {
|
||
|
FLOAT eTemp;
|
||
|
wchar_t wchTemp;
|
||
|
VOLCANO_WEIGHTS weights;
|
||
|
|
||
|
// Swap scores and swap characters.
|
||
|
eTemp = peScore[pos1];
|
||
|
peScore[pos1] = peScore[pos2];
|
||
|
peScore[pos2] = eTemp;
|
||
|
|
||
|
wchTemp = pwchList[pos1];
|
||
|
pwchList[pos1] = pwchList[pos2];
|
||
|
pwchList[pos2] = wchTemp;
|
||
|
|
||
|
weights = pTuneScore[pos1];
|
||
|
pTuneScore[pos1]= pTuneScore[pos2];
|
||
|
pTuneScore[pos2]= weights;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Call the core recognizer for the given character. Returned the
|
||
|
// number of alternates produced, or -1 if an error occurs.
|
||
|
int CoreRecognizeChar(
|
||
|
ALT_LIST *pAltList, // Alt list to be returned
|
||
|
int cAlt, // Max number of alternates
|
||
|
GLYPH **ppGlyph, // Character to recognize (which may be modified)
|
||
|
int nRealStrokes, // Real stroke count for abort processing
|
||
|
RECT *pGuideBox, // Guide box (for partial mode)
|
||
|
RECOG_SETTINGS *pRecogSettings, // Partial mode, other settings
|
||
|
CHARSET *pCS, // ALCs
|
||
|
int *piRecognizer, // Returns the VOLCANO_CONFIG_* constant for the recognizer used
|
||
|
int *piSpace) // The space number in that recognizer
|
||
|
{
|
||
|
int iRet = -1;
|
||
|
int iRecognizer = VOLCANO_CONFIG_NONE;
|
||
|
int nStrokes = CframeGLYPH(*ppGlyph);
|
||
|
|
||
|
if (nStrokes > VOLCANO_CONFIG_MAX_STROKE_COUNT) nStrokes = VOLCANO_CONFIG_MAX_STROKE_COUNT;
|
||
|
if (pRecogSettings->partialMode) nStrokes = 0;
|
||
|
iRecognizer = g_latticeConfigInfo.iRecognizers[nStrokes];
|
||
|
|
||
|
*piRecognizer = iRecognizer;
|
||
|
*piSpace = -1;
|
||
|
|
||
|
pAltList->cAlt = 0;
|
||
|
|
||
|
// Call the selected recognizer
|
||
|
switch (iRecognizer)
|
||
|
{
|
||
|
case VOLCANO_CONFIG_OTTER:
|
||
|
if (g_fUseJaws)
|
||
|
{
|
||
|
iRet = JawsMatch(&g_JawsLoadInfo, &g_FuguLoadInfo, &g_SoleLoadInfo,
|
||
|
pAltList, cAlt, *ppGlyph, pGuideBox, pCS, &g_locRunInfo);
|
||
|
*piSpace = nStrokes;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
iRet = OtterMatch2(pAltList, cAlt, *ppGlyph, pCS, &g_locRunInfo, piSpace);
|
||
|
|
||
|
// Other experiments
|
||
|
// iRet = FuguMatch(&g_FuguLoadInfo.fugu, pAltList, cAlt, *ppGlyph, NULL /*pGuideBox*/, pCS, &g_locRunInfo);
|
||
|
// iRet = SoleMatch(pAltList, cAlt, *ppGlyph, pGuideBox, pCS, &g_locRunInfo);
|
||
|
// *piSpace = nStrokes;
|
||
|
}
|
||
|
break;
|
||
|
|
||
|
case VOLCANO_CONFIG_ZILLA:
|
||
|
iRet = ZillaMatch(pAltList, cAlt, ppGlyph, pCS, g_vtuneInfo.pTune->flZillaGeo,
|
||
|
(pRecogSettings->partialMode ? pRecogSettings->pAbort : NULL),
|
||
|
nRealStrokes, pRecogSettings->partialMode, pGuideBox);
|
||
|
|
||
|
// For Zilla, the space number is the feature count. To make them disjoint from the
|
||
|
// Otter spaces, add on the maximum number of Otter spaces.
|
||
|
*piSpace = CframeGLYPH(*ppGlyph) + OTTER_NUM_SPACES;
|
||
|
|
||
|
// Here you can change the iRecognizer that is returned to indicate that the Hound/Zilla
|
||
|
// combiner ran, instead of just Zilla alone. That way tuning will know to use a different
|
||
|
// weighting parameter.
|
||
|
break;
|
||
|
|
||
|
default:
|
||
|
// No recognizer available for this stroke count
|
||
|
iRet = -1;
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
return iRet;
|
||
|
}
|
||
|
|
||
|
// Allocate a cache for the recognizer results.
|
||
|
void *AllocateRecognizerCache()
|
||
|
{
|
||
|
CACHE *pCache = (CACHE *) ExternAlloc(sizeof(CACHE));
|
||
|
if (pCache == NULL)
|
||
|
{
|
||
|
return NULL;
|
||
|
}
|
||
|
pCache->nStrokes = 0;
|
||
|
pCache->pStrokes = NULL;
|
||
|
return pCache;
|
||
|
}
|
||
|
|
||
|
// Free up a cache for the recognizer results.
|
||
|
void FreeRecognizerCache(void *pvCache)
|
||
|
{
|
||
|
CACHE *pCache = (CACHE *) pvCache;
|
||
|
CACHE_ENTRY *pEntry;
|
||
|
int iStroke;
|
||
|
if (pvCache == NULL)
|
||
|
{
|
||
|
return;
|
||
|
}
|
||
|
for (iStroke = 0; iStroke < pCache->nStrokes; iStroke++)
|
||
|
{
|
||
|
pEntry = pCache->pStrokes[iStroke];
|
||
|
while (pEntry != NULL)
|
||
|
{
|
||
|
CACHE_ENTRY *pNext = pEntry->pNext;
|
||
|
ExternFree(pEntry);
|
||
|
pEntry = pNext;
|
||
|
}
|
||
|
}
|
||
|
ExternFree(pCache->pStrokes);
|
||
|
ExternFree(pCache);
|
||
|
}
|
||
|
|
||
|
// Look for results for a given range of strokes, return the recognizer and its
|
||
|
// alternate list.
|
||
|
ALT_LIST *LookupRecognizerCache(void *pvCache, int iStroke, int nStrokes, int *piRecognizer)
|
||
|
{
|
||
|
CACHE *pCache = (CACHE *) pvCache;
|
||
|
CACHE_ENTRY *pEntry;
|
||
|
if (pCache == NULL || iStroke >= pCache->nStrokes)
|
||
|
{
|
||
|
return NULL;
|
||
|
}
|
||
|
// For the given ending stroke, look for a result for the right number of strokes
|
||
|
pEntry = pCache->pStrokes[iStroke];
|
||
|
while (pEntry != NULL && pEntry->nStrokes != nStrokes)
|
||
|
{
|
||
|
pEntry = pEntry->pNext;
|
||
|
}
|
||
|
// If not found, return nothing.
|
||
|
if (pEntry == NULL)
|
||
|
{
|
||
|
return NULL;
|
||
|
}
|
||
|
// Otherwise return the cached results.
|
||
|
*piRecognizer = pEntry->iRecognizer;
|
||
|
return &(pEntry->alts);
|
||
|
}
|
||
|
|
||
|
// Add the alternate list to the cache.
|
||
|
void AddRecognizerCache(void *pvCache, int iStroke, int nStrokes, int iRecognizer, ALT_LIST *pAlts)
|
||
|
{
|
||
|
CACHE *pCache = (CACHE *) pvCache;
|
||
|
CACHE_ENTRY *pEntry;
|
||
|
// If no cache, then exit
|
||
|
if (pCache == NULL)
|
||
|
{
|
||
|
return;
|
||
|
}
|
||
|
// If the cache is currently too small, then allocate more space for it.
|
||
|
if (iStroke >= pCache->nStrokes)
|
||
|
{
|
||
|
int i;
|
||
|
int nStrokesNew = max(10, (iStroke + 1) * 2);
|
||
|
CACHE_ENTRY **pStrokesNew = (CACHE_ENTRY **) ExternRealloc(pCache->pStrokes, sizeof(CACHE_ENTRY *) * nStrokesNew);
|
||
|
if (pStrokesNew == NULL)
|
||
|
{
|
||
|
// If the allocation failed, just continue with the current cache size
|
||
|
return;
|
||
|
}
|
||
|
// Initialize the memory
|
||
|
for (i = pCache->nStrokes; i < nStrokesNew; i++)
|
||
|
{
|
||
|
pStrokesNew[i] = NULL;
|
||
|
}
|
||
|
pCache->pStrokes = pStrokesNew;
|
||
|
pCache->nStrokes = nStrokesNew;
|
||
|
}
|
||
|
// If we got here, then add the entry to the cache
|
||
|
pEntry = (CACHE_ENTRY *) ExternAlloc(sizeof(CACHE_ENTRY));
|
||
|
if (pEntry == NULL)
|
||
|
{
|
||
|
return;
|
||
|
}
|
||
|
pEntry->nStrokes = nStrokes;
|
||
|
pEntry->iRecognizer = iRecognizer;
|
||
|
pEntry->alts = *pAlts;
|
||
|
pEntry->pNext = pCache->pStrokes[iStroke];
|
||
|
pCache->pStrokes[iStroke] = pEntry;
|
||
|
}
|
||
|
|
||
|
#ifdef USE_OLD_DATABASES
|
||
|
// This call is roughly the equivalent of the RecognizeChar call below, but instead of
|
||
|
// returning probabilities, it returns an alternate list with scores. It uses the old Tsunami
|
||
|
// recognition procedure, with otter and zilla returning code points, followed by adhoc rules,
|
||
|
// language model, baseline/height scores, and crane. The result of this is used by RecognizeChar
|
||
|
// to look up the old probability table.
|
||
|
INT RecognizeCharInsurance(
|
||
|
RECOG_SETTINGS *pRecogSettings,// In: Setting for recognizers.
|
||
|
UINT cStrokes, // In: Number of strokes to process.
|
||
|
UINT cRealStrokes, // In: Number of strokes before merging
|
||
|
STROKE *pStrokes, // In: Array of strokes to process.
|
||
|
FLOAT *pProbIsChar, // Out: probability of being valid char.
|
||
|
UINT maxAlts, // In: Size of alts array supplied.
|
||
|
RECOG_ALT *pProbAlts, // Out: alternate list matched with probabilities.
|
||
|
int *pnProbAlts,
|
||
|
RECOG_ALT *pScoreAlts, // Out: alternate list matched with scores
|
||
|
int *pnScoreAlts,
|
||
|
RECT *pGuideBox, // In: Guide box for this ink.
|
||
|
wchar_t dchContext, // In: Context
|
||
|
int *pSpace, // Out: Space number used for matching
|
||
|
VOLCANO_WEIGHTS *pTuneScore, // Out: score components
|
||
|
BOOL fStringMode, // In: Whether or not the recognizer is in string mode
|
||
|
BOOL fProbMode, // In: Whether the recognizer is in probability mode
|
||
|
void *pvCache, // In/Out: Pointer to cache, or NULL if not being used
|
||
|
int iStroke // In: Index of last stroke of character
|
||
|
) {
|
||
|
ALT_LIST *pCacheResult = NULL;
|
||
|
BOXINFO box;
|
||
|
RECT bbox;
|
||
|
int iAlt;
|
||
|
GLYPH *pGlyph;
|
||
|
ALT_LIST altList;
|
||
|
CHARSET charSet; // Mask used for core recognizers
|
||
|
CHARSET charSetMask; // Mask used for probability table lookup
|
||
|
BOOL fCraneBonus = FALSE;
|
||
|
int iRecognizer;
|
||
|
|
||
|
// Convert strokes to GLYPHs and FRAMEs so that we can call the
|
||
|
// old code.
|
||
|
|
||
|
pGlyph = GlyphFromStrokes(cStrokes, pStrokes);
|
||
|
if (!pGlyph)
|
||
|
{
|
||
|
return -1;
|
||
|
}
|
||
|
|
||
|
// Run otter or zilla as needed.
|
||
|
altList.cAlt = 0;
|
||
|
charSetMask.recmask = pRecogSettings->alcValid;
|
||
|
charSetMask.recmaskPriority = pRecogSettings->alcPriority;
|
||
|
charSetMask.pbAllowedChars = pRecogSettings->pbAllowedChars;
|
||
|
charSetMask.pbPriorityChars = pRecogSettings->pbPriorityChars;
|
||
|
if (fProbMode)
|
||
|
{
|
||
|
// In probability mode, don't mask off the core recognizers
|
||
|
charSet.recmask = 0xFFFFFFFF;
|
||
|
charSet.recmaskPriority = 0;
|
||
|
charSet.pbAllowedChars = NULL;
|
||
|
charSet.pbPriorityChars = NULL;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
// In score mode, mask off the core recognizers
|
||
|
charSet = charSetMask;
|
||
|
}
|
||
|
|
||
|
// Get the bounding box for the character
|
||
|
GetRectGLYPH(pGlyph,&bbox);
|
||
|
|
||
|
// Try going to the cache
|
||
|
pCacheResult = LookupRecognizerCache(pvCache, iStroke, cStrokes, &iRecognizer);
|
||
|
if (pCacheResult != NULL)
|
||
|
{
|
||
|
// If it was the Zilla recognizer before, we need to run featurization because
|
||
|
// of its side-effect of fragmenting the strokes, which crane needs.
|
||
|
if (iRecognizer == VOLCANO_CONFIG_ZILLA)
|
||
|
{
|
||
|
BIGPRIM rgprim[CPRIMMAX];
|
||
|
BYTE aSampleVector[29 * 4];
|
||
|
ZillaFeaturize(&pGlyph, rgprim, aSampleVector);
|
||
|
}
|
||
|
altList = *pCacheResult;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
// Invoke Otter or Zilla or any other recognizer that has been specified in the configuration
|
||
|
CoreRecognizeChar(&altList, MAX_ALT_LIST, &pGlyph, cRealStrokes, pGuideBox, pRecogSettings, &charSet, &iRecognizer, pSpace);
|
||
|
|
||
|
// Add it to the cache, since it isn't there already.
|
||
|
AddRecognizerCache(pvCache, iStroke, cStrokes, iRecognizer, &altList);
|
||
|
}
|
||
|
|
||
|
// If we're doing an experiment to simulate an optimal otter or zilla,
|
||
|
// replace the real alt list with a fake one.
|
||
|
#ifdef OPTIMAL_OTTER_ZILLA
|
||
|
{
|
||
|
wchar_t dch;
|
||
|
altList.cAlt = 1;
|
||
|
altList.aeScore[0] = 0;
|
||
|
{
|
||
|
FILE *f = fopen("c:/answer.txt", "r");
|
||
|
fscanf(f, "%hx", &(altList.awchList[0]));
|
||
|
fclose(f);
|
||
|
}
|
||
|
dch = LocRunUnicode2Dense(&g_locRunInfo, altList.awchList[0]);
|
||
|
if (dch != LOC_TRAIN_NO_DENSE_CODE) {
|
||
|
wchar_t fdch = LocRunDense2Folded(&g_locRunInfo, dch);
|
||
|
if (fdch != 0) dch = fdch;
|
||
|
altList.awchList[0] = dch;
|
||
|
} else {
|
||
|
altList.cAlt = 0;
|
||
|
}
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
// Get our rough approximation of the probability that this is
|
||
|
// actually a character. If zero alternates are returned, then
|
||
|
// set the space number to -1 as an error flag.
|
||
|
if (altList.cAlt == 0) {
|
||
|
*pSpace = -1;
|
||
|
*pProbIsChar = 0;
|
||
|
|
||
|
*pnProbAlts = 0;
|
||
|
*pnScoreAlts = 0;
|
||
|
goto cleanup;
|
||
|
}
|
||
|
|
||
|
// Unfold anything in the alt list which needs it.
|
||
|
UnfoldCodes(&altList, &charSet);
|
||
|
|
||
|
// If we couldn't load the probability table, then use the
|
||
|
// WinCE method to get probabilities.
|
||
|
if (g_pProbHeader == NULL)
|
||
|
{
|
||
|
*pnProbAlts = GetProbsTsunamiFixedTable(cStrokes, &altList, maxAlts, pProbAlts, &charSetMask);
|
||
|
}
|
||
|
|
||
|
// Apply crane, if we have a guide for it to use and we are not in partial mode
|
||
|
if (pRecogSettings->partialMode == HWX_PARTIAL_ALL && pGuideBox != NULL && altList.cAlt > 0) {
|
||
|
fCraneBonus = Afterburn(&altList, pGlyph, &charSet, pGuideBox, bbox);
|
||
|
// Hack to bypass crane if otter a troublesome kana character
|
||
|
if (IsFaultyKana(LocRunDense2Unicode(&g_locRunInfo,altList.awchList[0]))) {
|
||
|
fCraneBonus = FALSE;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Save away the scores for the alternates, then apply the weight for the particular
|
||
|
// recognizer used. Then add in the crane bonus/penalty and the adhoc rules.
|
||
|
for (iAlt=0; iAlt<(int)altList.cAlt; iAlt++)
|
||
|
{
|
||
|
int iParam = (fStringMode ? VTUNE_STRING_CORE : VTUNE_CHAR_CORE) + iRecognizer;
|
||
|
pTuneScore[iAlt].afl[iParam] = altList.aeScore[iAlt];
|
||
|
altList.aeScore[iAlt] *= g_vtuneInfo.pTune->weights.afl[iParam];
|
||
|
|
||
|
// Crane is now implemented as a penalty rather than a bonus. This means
|
||
|
// all alternates after the first one get a penalty, and even the first one
|
||
|
// gets a penalty if no crane bonus is applied.
|
||
|
if (iAlt > 0 || !fCraneBonus)
|
||
|
{
|
||
|
iParam = fStringMode ? VTUNE_STRING_CRANE : VTUNE_CHAR_CRANE;
|
||
|
pTuneScore[iAlt].afl[iParam] = -1;
|
||
|
altList.aeScore[iAlt] -= g_vtuneInfo.pTune->weights.afl[iParam];
|
||
|
}
|
||
|
|
||
|
// Add adhoc penalties for the one stroke i and j and two stroke circle shapes
|
||
|
if (pRecogSettings->partialMode == HWX_PARTIAL_ALL)
|
||
|
{
|
||
|
altList.aeScore[iAlt] += AdHocRuleCost(cStrokes, altList.awchList[iAlt], pTuneScore + iAlt);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Sort the alternates out.
|
||
|
SortAltListAndTune(&altList, pTuneScore);
|
||
|
|
||
|
// Copy the score-based alts to the output
|
||
|
for (iAlt = 0; iAlt < (int)altList.cAlt && iAlt < (int)maxAlts && iAlt < (int)MAX_ALT_LIST; ++iAlt)
|
||
|
{
|
||
|
pScoreAlts[iAlt].wch = altList.awchList[iAlt];
|
||
|
pScoreAlts[iAlt].prob = altList.aeScore[iAlt];
|
||
|
}
|
||
|
*pnScoreAlts = altList.cAlt;
|
||
|
|
||
|
// Re-score the alternates using the old weightings in the
|
||
|
// TTune structure, so that prob table lookup will be weighting
|
||
|
// independent.
|
||
|
for (iAlt = 0; iAlt < (int)altList.cAlt; ++iAlt)
|
||
|
{
|
||
|
altList.aeScore[iAlt] =
|
||
|
g_vtuneInfo.pTune->weights.afl[VTUNE_ADHOC_IJ] * pTuneScore[iAlt].afl[VTUNE_ADHOC_IJ] +
|
||
|
g_vtuneInfo.pTune->weights.afl[VTUNE_ADHOC_CIRCLE] * pTuneScore[iAlt].afl[VTUNE_ADHOC_CIRCLE] +
|
||
|
(cStrokes > 2 ? g_ttuneInfo.pTTuneCosts->ZillaChar.CARTAddWeight :
|
||
|
g_ttuneInfo.pTTuneCosts->OtterChar.CARTAddWeight)
|
||
|
* pTuneScore[iAlt].afl[fStringMode ? VTUNE_STRING_CRANE : VTUNE_CHAR_CRANE] +
|
||
|
pTuneScore[iAlt].afl[(fStringMode ? VTUNE_STRING_CORE : VTUNE_CHAR_CORE) + iRecognizer];
|
||
|
}
|
||
|
|
||
|
// Build up a BOXINFO structure from the guide, for use in the baseline/height scoring
|
||
|
if (pGuideBox!=NULL) {
|
||
|
box.size = pGuideBox->bottom - pGuideBox->top;
|
||
|
box.baseline = pGuideBox->bottom;
|
||
|
box.xheight = box.size / 2;
|
||
|
box.midline = box.baseline - box.xheight;
|
||
|
}
|
||
|
|
||
|
// For each alternate
|
||
|
for (iAlt=0; iAlt<(int)altList.cAlt; iAlt++) {
|
||
|
float cost;
|
||
|
// Apply baseline/height and language model unigram scores
|
||
|
if (cStrokes<3) {
|
||
|
if (pGuideBox!=NULL) {
|
||
|
cost = BaselineTransitionCost(0,bbox,&box,altList.awchList[iAlt],bbox,&box)
|
||
|
* g_ttuneInfo.pTTuneCosts->OtterChar.BaseWeight;
|
||
|
altList.aeScore[iAlt] += cost;
|
||
|
|
||
|
cost = BaselineBoxCost(altList.awchList[iAlt],bbox,&box)
|
||
|
* g_ttuneInfo.pTTuneCosts->OtterChar.BoxBaselineWeight;
|
||
|
altList.aeScore[iAlt] += cost;
|
||
|
|
||
|
cost = HeightTransitionCost(0,bbox,&box,altList.awchList[iAlt],bbox,&box)
|
||
|
* g_ttuneInfo.pTTuneCosts->OtterChar.HeightWeight;
|
||
|
altList.aeScore[iAlt] += cost;
|
||
|
|
||
|
cost = HeightBoxCost(altList.awchList[iAlt],bbox,&box)
|
||
|
* g_ttuneInfo.pTTuneCosts->OtterChar.BoxHeightWeight;
|
||
|
altList.aeScore[iAlt] += cost;
|
||
|
}
|
||
|
cost = UnigramCost(&g_unigramInfo,altList.awchList[iAlt])
|
||
|
* g_ttuneInfo.pTTuneCosts->OtterChar.UniWeight;
|
||
|
altList.aeScore[iAlt] += cost;
|
||
|
} else {
|
||
|
if (pGuideBox!=NULL) {
|
||
|
cost = BaselineTransitionCost(0,bbox,&box,altList.awchList[iAlt],bbox,&box)
|
||
|
* g_ttuneInfo.pTTuneCosts->ZillaChar.BaseWeight;
|
||
|
altList.aeScore[iAlt] += cost;
|
||
|
|
||
|
cost = BaselineBoxCost(altList.awchList[iAlt],bbox,&box)
|
||
|
* g_ttuneInfo.pTTuneCosts->ZillaChar.BoxBaselineWeight;
|
||
|
altList.aeScore[iAlt] += cost;
|
||
|
|
||
|
cost = HeightTransitionCost(0,bbox,&box,altList.awchList[iAlt],bbox,&box)
|
||
|
* g_ttuneInfo.pTTuneCosts->ZillaChar.HeightWeight;
|
||
|
altList.aeScore[iAlt] += cost;
|
||
|
|
||
|
cost = HeightBoxCost(altList.awchList[iAlt],bbox,&box)
|
||
|
* g_ttuneInfo.pTTuneCosts->ZillaChar.BoxHeightWeight;
|
||
|
altList.aeScore[iAlt] += cost;
|
||
|
|
||
|
}
|
||
|
cost = UnigramCost(&g_unigramInfo,altList.awchList[iAlt])
|
||
|
* g_ttuneInfo.pTTuneCosts->ZillaChar.UniWeight;
|
||
|
altList.aeScore[iAlt] += cost;
|
||
|
|
||
|
// Zilla scores get fudged
|
||
|
altList.aeScore[iAlt] *= g_ttuneInfo.pTTuneCosts->ZillaStrFudge;
|
||
|
}
|
||
|
|
||
|
// If context was available for this character, then use the bigram/class bigram scores
|
||
|
if (dchContext != SYM_UNKNOWN && dchContext != 0) {
|
||
|
#if !defined(WINCE) && !defined(FAKE_WINCE)
|
||
|
cost = BigramTransitionCost(&g_locRunInfo,&g_bigramInfo,dchContext,altList.awchList[iAlt])
|
||
|
* g_ttuneInfo.pTTuneCosts->BiWeight;
|
||
|
altList.aeScore[iAlt] += cost;
|
||
|
#endif
|
||
|
|
||
|
cost = ClassBigramTransitionCost(&g_locRunInfo,&g_classBigramInfo,dchContext,altList.awchList[iAlt])
|
||
|
* g_ttuneInfo.pTTuneCosts->BiClassWeight;
|
||
|
altList.aeScore[iAlt] += cost;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Sort the resulting alternates
|
||
|
SortAltList(&altList);
|
||
|
|
||
|
// This is a temporary call to get probs directly, until we have Hawk.
|
||
|
if (g_pProbHeader != NULL)
|
||
|
{
|
||
|
*pnProbAlts = GetProbsTsunami(cStrokes, &altList, maxAlts, pProbAlts, &charSetMask);
|
||
|
}
|
||
|
#if 0
|
||
|
{
|
||
|
FILE *f=fopen("c:/temp/prob.log","a+");
|
||
|
fprintf(f,"%04X %g -> %04X %g\n", altList.awchList[0], altList.aeScore[0],
|
||
|
pProbAlts[0].wch, pProbAlts[0].prob);
|
||
|
fclose(f);
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
//#define TEST_FOR_PATRICKH
|
||
|
#ifdef TEST_FOR_PATRICKH
|
||
|
{
|
||
|
int i;
|
||
|
for (i=0; i<*pnProbAlts && i<(int)altList.cAlt; i++)
|
||
|
pProbAlts[i].wch = altList.awchList[i];
|
||
|
*pnProbAlts = i;
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
cleanup:
|
||
|
// Free the glyph structure.
|
||
|
DestroyFramesGLYPH(pGlyph);
|
||
|
DestroyGLYPH(pGlyph);
|
||
|
|
||
|
return *pnProbAlts;
|
||
|
}
|
||
|
|
||
|
#else
|
||
|
|
||
|
// Version of Afterburn to call Hawk.
|
||
|
int Afterburn(
|
||
|
ALT_LIST *pAltList, // Input used to select correct CART tree
|
||
|
GLYPH *pGlyph,
|
||
|
CHARSET *cs,
|
||
|
RECT *rGuide,
|
||
|
int otterSpace,
|
||
|
UINT maxAlts, // Size of alts array supplied.
|
||
|
RECOG_ALT *pAlts // Out: alternate list matched.
|
||
|
) {
|
||
|
UINT ii;
|
||
|
UINT iDest;
|
||
|
// UINT jj, kk;
|
||
|
BASICINFO basicInfo;
|
||
|
FEATINFO featInfo;
|
||
|
HANDLE hCartTree;
|
||
|
QALT aQAlt[MAX_RECOG_ALTS];
|
||
|
UINT cQAlt;
|
||
|
#if 0
|
||
|
double aWeights[MAX_ALT_LIST];
|
||
|
double fSum;
|
||
|
double offset;
|
||
|
FILE *pFile;
|
||
|
#endif
|
||
|
|
||
|
RECT bbox;
|
||
|
DRECTS drcs;
|
||
|
|
||
|
if (pGlyph == NULL) {
|
||
|
return -1;
|
||
|
}
|
||
|
|
||
|
// Get the bounding box for the character
|
||
|
GetRectGLYPH(pGlyph, &bbox);
|
||
|
|
||
|
// Scale and translate the guide box to compute the 'delta rectangle'
|
||
|
if (rGuide == NULL) {
|
||
|
// No guide given, This is the current assumption.
|
||
|
drcs.x = 0;
|
||
|
drcs.y = 0;
|
||
|
drcs.w = 1000;
|
||
|
drcs.h = 1000;
|
||
|
} else {
|
||
|
// Actually got a guide, pass it on. Current code ignores the
|
||
|
// guide, but may add it back so don't lose code path.
|
||
|
drcs.x = rGuide->left;
|
||
|
drcs.y = rGuide->top;
|
||
|
drcs.w = rGuide->right - rGuide->left;
|
||
|
drcs.h = rGuide->bottom - rGuide->top;
|
||
|
}
|
||
|
|
||
|
// Translate, convert to delta form
|
||
|
bbox.left -= drcs.x;
|
||
|
bbox.top -= drcs.y;
|
||
|
bbox.right -= (drcs.x + bbox.left);
|
||
|
bbox.bottom -= (drcs.y + bbox.top);
|
||
|
|
||
|
// Scale. We do isotropic scaling and center the shorter dimension.
|
||
|
if (drcs.w > drcs.h) {
|
||
|
drcs.x = ((1000 * bbox.left) / drcs.w);
|
||
|
drcs.y = ((1000 * bbox.top) / drcs.w) + ((drcs.w - drcs.h) / 2);
|
||
|
drcs.h = ((1000 * bbox.bottom) / drcs.w);
|
||
|
drcs.w = ((1000 * bbox.right) / drcs.w);
|
||
|
} else {
|
||
|
drcs.x = ((1000 * bbox.left) / drcs.h) + ((drcs.h - drcs.w) / 2);
|
||
|
drcs.y = ((1000 * bbox.top) / drcs.h);
|
||
|
drcs.w = ((1000 * bbox.right) / drcs.h);
|
||
|
drcs.h = ((1000 * bbox.bottom) / drcs.h);
|
||
|
}
|
||
|
|
||
|
// Fill in basic info.
|
||
|
// basicInfo.cStrk -- Filed in by MakeFeatures.
|
||
|
basicInfo.cSpace = (short)otterSpace;
|
||
|
basicInfo.drcs = drcs;
|
||
|
|
||
|
// Fill in feature info.
|
||
|
if (!MakeFeatures(&basicInfo, &featInfo, pGlyph)) {
|
||
|
return -1;
|
||
|
}
|
||
|
|
||
|
#if 1
|
||
|
// Find cart tree
|
||
|
hCartTree = (HANDLE)0;
|
||
|
for (ii = 0; !hCartTree && ii < pAltList->cAlt; ++ii) {
|
||
|
hCartTree = HawkFindTree(basicInfo.cStrk, basicInfo.cSpace, pAltList->awchList[ii]);
|
||
|
}
|
||
|
if (!hCartTree) {
|
||
|
// No cart tree for anything in the alt list!?!?!
|
||
|
return -1;
|
||
|
}
|
||
|
|
||
|
// Do the match.
|
||
|
//HawkMatch(pAltList, MAX_ALT_LIST, pGlyph, cs, &drcs, eCARTWeight, &g_locRunInfo);
|
||
|
cQAlt = HawkMatch(&basicInfo, &featInfo, hCartTree, aQAlt);
|
||
|
|
||
|
// Copy out the alt list, applying the ALC
|
||
|
iDest = 0;
|
||
|
for (ii = 0; ii < cQAlt && iDest < maxAlts; ++ii)
|
||
|
{
|
||
|
if (IsAllowedChar(&g_locRunInfo, cs, aQAlt[ii].dch))
|
||
|
{
|
||
|
pAlts[iDest].wch = aQAlt[ii].dch;
|
||
|
pAlts[iDest].prob = aQAlt[ii].prob;
|
||
|
iDest++;
|
||
|
}
|
||
|
}
|
||
|
cQAlt = iDest;
|
||
|
#elif 0
|
||
|
|
||
|
// Select stroke dependent offset used to compute weights below.
|
||
|
switch (basicInfo.cStrk) {
|
||
|
case 1 : offset = .01; break;
|
||
|
case 2 : offset = .05; break;
|
||
|
default : offset = .05; break;
|
||
|
}
|
||
|
|
||
|
// Compute wighting to apply to each trees results.
|
||
|
fSum = 0.0;
|
||
|
for (ii = 0; ii < pAltList->cAlt; ++ii) {
|
||
|
double ratio;
|
||
|
|
||
|
ratio = offset / (offset + pAltList->aeScore[0] - pAltList->aeScore[ii]);
|
||
|
aWeights[ii] = ratio * ratio * ratio;
|
||
|
fSum += aWeights[ii];
|
||
|
}
|
||
|
|
||
|
// Normalize to sum to one.
|
||
|
for (ii = 0; ii < pAltList->cAlt; ++ii) {
|
||
|
aWeights[ii] /= fSum;
|
||
|
}
|
||
|
|
||
|
pFile = fopen("AltList.dump", "a");
|
||
|
fprintf(pFile, "Start Dump:\n");
|
||
|
// Find each cart tree and add results to list.
|
||
|
hCartTree = (HANDLE)0;
|
||
|
cQAlt = 0;
|
||
|
for (ii = 0; ii < pAltList->cAlt && cQAlt < maxAlts; ++ii) {
|
||
|
hCartTree = HawkFindTree(basicInfo.cStrk, basicInfo.cSpace, pAltList->awchList[ii]);
|
||
|
if (hCartTree) {
|
||
|
UINT cQAltNew;
|
||
|
SCORE penalty;
|
||
|
int skipped;
|
||
|
|
||
|
// Do the match.
|
||
|
cQAltNew = HawkMatch(&basicInfo, &featInfo, hCartTree, aQAlt);
|
||
|
|
||
|
// How much can we add?
|
||
|
if (cQAltNew > maxAlts - cQAlt) {
|
||
|
cQAltNew = maxAlts - cQAlt;
|
||
|
}
|
||
|
|
||
|
// Convert our weight (Probability) to a log prob.
|
||
|
penalty = ProbToScore(aWeights[ii]);
|
||
|
|
||
|
// Zilla overgenerates prototypes, so look for different top one from
|
||
|
// additional trees.
|
||
|
if (ii > 0 && basicInfo.cStrk >= 3 && aQAlt[0].dch == pAlts[0].wch) {
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
// Add to list.
|
||
|
skipped = 0;
|
||
|
for (jj = 0; jj < cQAltNew; ++jj) {
|
||
|
SCORE newScore;
|
||
|
|
||
|
// Check for duplicates in the alternate list. Each individual list has not
|
||
|
// dups, so we don't have to check them.
|
||
|
newScore = aQAlt[jj].prob + penalty;
|
||
|
fprintf(pFile, " %04X:%d->%d", LocRunDense2Unicode(&g_locRunInfo,aQAlt[jj].dch),aQAlt[jj].prob,newScore);
|
||
|
for (kk = 0; kk < cQAlt; ++kk) {
|
||
|
if (aQAlt[jj].dch == pAlts[kk].wch) {
|
||
|
ASSERT(pAlts[kk].prob == (float)(int)pAlts[kk].prob);
|
||
|
pAlts[kk].prob = ScoreAddProbs((SCORE)pAlts[kk].prob, newScore);
|
||
|
++skipped;
|
||
|
goto noAdd;
|
||
|
}
|
||
|
}
|
||
|
pAlts[jj - skipped + cQAlt].wch = aQAlt[jj].dch;
|
||
|
pAlts[jj - skipped + cQAlt].prob = (float)newScore;
|
||
|
noAdd: ;
|
||
|
}
|
||
|
fprintf(pFile, "\n");
|
||
|
|
||
|
cQAlt += cQAltNew - skipped;
|
||
|
}
|
||
|
}
|
||
|
for (kk = 0; kk < cQAlt; ++kk) {
|
||
|
fprintf(pFile, " %04X:%g", LocRunDense2Unicode(&g_locRunInfo,pAlts[kk].wch),pAlts[kk].prob);
|
||
|
}
|
||
|
fprintf(pFile, "\n");
|
||
|
fprintf(pFile, "End Dump\n");
|
||
|
fclose(pFile);
|
||
|
|
||
|
#else
|
||
|
|
||
|
// Select stroke dependent offset used to compute weights below.
|
||
|
switch (basicInfo.cStrk) {
|
||
|
case 1 : offset = 1.0; break;
|
||
|
case 2 : offset = 1.0; break;
|
||
|
default : offset = 1.0; break;
|
||
|
}
|
||
|
|
||
|
pFile = fopen("AltList.dump", "a");
|
||
|
fprintf(pFile, "Start Dump:\n");
|
||
|
// Find each cart tree and add results to list.
|
||
|
hCartTree = (HANDLE)0;
|
||
|
cQAlt = 0;
|
||
|
for (ii = 0; ii < pAltList->cAlt && cQAlt < maxAlts; ++ii) {
|
||
|
hCartTree = HawkFindTree(basicInfo.cStrk, basicInfo.cSpace, pAltList->awchList[ii]);
|
||
|
if (hCartTree) {
|
||
|
UINT cQAltNew;
|
||
|
SCORE penalty;
|
||
|
int skipped;
|
||
|
|
||
|
// Do the match.
|
||
|
cQAltNew = HawkMatch(&basicInfo, &featInfo, hCartTree, aQAlt);
|
||
|
|
||
|
// How much can we add?
|
||
|
if (cQAltNew > maxAlts - cQAlt) {
|
||
|
cQAltNew = maxAlts - cQAlt;
|
||
|
}
|
||
|
|
||
|
// Convert our weight (Probability) to a log prob.
|
||
|
penalty = (SCORE)((pAltList->aeScore[0] - pAltList->aeScore[ii]) * 2040);
|
||
|
|
||
|
// Zilla overgenerates prototypes, so look for different top one from
|
||
|
// additional trees.
|
||
|
if (ii > 0 && basicInfo.cStrk >= 3 && aQAlt[0].dch == pAlts[0].wch) {
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
// Add to list.
|
||
|
skipped = 0;
|
||
|
for (jj = 0; jj < cQAltNew; ++jj) {
|
||
|
SCORE newScore;
|
||
|
|
||
|
// Check for duplicates in the alternate list. Each individual list has not
|
||
|
// dups, so we don't have to check them.
|
||
|
newScore = aQAlt[jj].prob + penalty;
|
||
|
fprintf(pFile, " %04X:%d->%d", LocRunDense2Unicode(&g_locRunInfo,aQAlt[jj].dch),aQAlt[jj].prob,newScore);
|
||
|
for (kk = 0; kk < cQAlt; ++kk) {
|
||
|
if (aQAlt[jj].dch == pAlts[kk].wch) {
|
||
|
ASSERT(pAlts[kk].prob == (float)(int)pAlts[kk].prob);
|
||
|
pAlts[kk].prob = ScoreAddProbs((SCORE)pAlts[kk].prob, newScore);
|
||
|
++skipped;
|
||
|
goto noAdd;
|
||
|
}
|
||
|
}
|
||
|
pAlts[jj - skipped + cQAlt].wch = aQAlt[jj].dch;
|
||
|
pAlts[jj - skipped + cQAlt].prob = (float)newScore;
|
||
|
noAdd: ;
|
||
|
}
|
||
|
fprintf(pFile, "\n");
|
||
|
|
||
|
cQAlt += cQAltNew - skipped;
|
||
|
}
|
||
|
}
|
||
|
for (kk = 0; kk < cQAlt; ++kk) {
|
||
|
fprintf(pFile, " %04X:%g", LocRunDense2Unicode(&g_locRunInfo,pAlts[kk].wch),pAlts[kk].prob);
|
||
|
}
|
||
|
fprintf(pFile, "\n");
|
||
|
fprintf(pFile, "End Dump\n");
|
||
|
fclose(pFile);
|
||
|
|
||
|
#endif
|
||
|
|
||
|
FreeFeatures(&featInfo);
|
||
|
|
||
|
return cQAlt;
|
||
|
}
|
||
|
|
||
|
#endif
|
||
|
|
||
|
#ifndef USE_OLD_DATABASES
|
||
|
// Do the recognition.
|
||
|
INT
|
||
|
RecognizeChar(
|
||
|
RECOG_SETTINGS *pRecogSettings,// Setting for recognizers.
|
||
|
UINT cStrokes, // Number of strokes to process.
|
||
|
UINT cRealStrokes, // Number of strokes before merging
|
||
|
STROKE *pStrokes, // Array of strokes to process.
|
||
|
FLOAT *pProbIsChar, // Out: probability of being valid char.
|
||
|
UINT maxAlts, // Size of alts array supplied.
|
||
|
RECOG_ALT *pAlts, // Out: alternate list matched.
|
||
|
RECT *pGuideBox, // Guide box for this ink.
|
||
|
int *pCount
|
||
|
) {
|
||
|
INT cAlts;
|
||
|
GLYPH *pGlyph;
|
||
|
ALT_LIST altList;
|
||
|
CHARSET charSet;
|
||
|
int iRecognizer;
|
||
|
|
||
|
// Convert strokes to GLYPHs and FRAMEs so that we can call the
|
||
|
// old code.
|
||
|
pGlyph = GlyphFromStrokes(cStrokes, pStrokes);
|
||
|
if (!pGlyph) {
|
||
|
return -1;
|
||
|
}
|
||
|
|
||
|
// Run otter or zilla as needed.
|
||
|
// a possible optimization would be Switch to proto matching versions of match calls
|
||
|
altList.cAlt = 0;
|
||
|
|
||
|
charSet.recmask = 0xFFFFFFFF;
|
||
|
charSet.recmaskPriority = 0;
|
||
|
charSet.pbAllowedChars = NULL;
|
||
|
charSet.pbPriorityChars = NULL;
|
||
|
|
||
|
// Invoke Otter or Zilla or any other recognizer that has been specified in the configuration
|
||
|
CoreRecognizeChar(&altList, MAX_ALT_LIST, &pGlyph, cRealStrokes, pGuideBox, pRecogSettings, &charSet, &iRecognizer, pCount);
|
||
|
|
||
|
charSet.recmask = pRecogSettings->alcValid;
|
||
|
charSet.recmaskPriority = pRecogSettings->alcPriority;
|
||
|
charSet.pbAllowedChars = pRecogSettings->pbAllowedChars;
|
||
|
charSet.pbPriorityChars = pRecogSettings->pbPriorityChars;
|
||
|
|
||
|
if (pRecogSettings->partialMode != HWX_PARTIAL_ALL) {
|
||
|
unsigned int ii;
|
||
|
|
||
|
// Unfold anything in the alt list which needs it.
|
||
|
UnfoldCodes(&altList, &charSet);
|
||
|
|
||
|
// Copy over the alt list.
|
||
|
// Note that we don't have probabilities, and they don't
|
||
|
// really make sense anyway. However the code that
|
||
|
// follows will discard items with a prob of zero, so
|
||
|
// they should be set to something.
|
||
|
for (ii = 0; ii < maxAlts && ii < altList.cAlt; ++ii) {
|
||
|
pAlts[ii].wch = altList.awchList[ii];
|
||
|
pAlts[ii].prob = -altList.aeScore[ii];
|
||
|
}
|
||
|
|
||
|
// Free the glyph structure.
|
||
|
DestroyFramesGLYPH(pGlyph);
|
||
|
DestroyGLYPH(pGlyph);
|
||
|
|
||
|
return ii;
|
||
|
}
|
||
|
|
||
|
// Get our rough approximation of the probability that this is
|
||
|
// actually a character.
|
||
|
*pProbIsChar = altList.aeScore[0];
|
||
|
|
||
|
// Run Hawk.
|
||
|
#ifndef DISABLE_HEURISTICS
|
||
|
cAlts = Afterburn(&altList, pGlyph, &charSet, pGuideBox, *pCount, maxAlts, pAlts);
|
||
|
#else
|
||
|
{
|
||
|
unsigned int ii;
|
||
|
UnfoldCodes(&altList, &charSet);
|
||
|
for (ii = 0; ii < maxAlts && ii < altList.cAlt; ii++)
|
||
|
{
|
||
|
pAlts[ii].wch = altList.awchList[ii];
|
||
|
pAlts[ii].prob = -altList.aeScore[ii];
|
||
|
}
|
||
|
cAlts = ii;
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
// Free the glyph structure.
|
||
|
DestroyFramesGLYPH(pGlyph);
|
||
|
DestroyGLYPH(pGlyph);
|
||
|
|
||
|
return cAlts;
|
||
|
}
|
||
|
#endif
|