windows-nt/Source/XPSP1/NT/drivers/tpg/hwx/hound/src/houndtrain.c
2020-09-26 16:20:57 +08:00

271 lines
6 KiB
C

#include <float.h>
#include <stdlib.h>
#include "common.h"
#include "score.h"
#include "math16.h"
#include "hound.h"
#include "houndp.h"
//// Data structures to hold access information on Hound space. We keep a linked list
//// for each dense code of the models for the at dense code.
#define MAX_DENSE_CODES (64 * 1024)
// List element.
typedef struct tagMODEL_ITEM {
BYTE *pModel;
int sizeModel; // Size in bytes.
struct tagMODEL_ITEM *pNext;
} MODEL_ITEM;
// Array to hold head of list for each dense code.
HOUND_SPACE *g_pHoundSpace;
int g_maxModelsPerCP;
int g_iMinModelHead;
int g_iMaxModelHead;
MODEL_ITEM *apModelHead[MAX_DENSE_CODES];
// Find max models for one code point.
int MaxHoundModelsPerCP()
{
int ii;
int max;
// Check each code point.
max = 0;
for (ii = g_iMinModelHead; ii < g_iMaxModelHead; ++ii)
{
int count;
MODEL_ITEM *pModel;
// Count models for code point.
count = 0;
for (pModel = apModelHead[ii]; pModel; pModel = pModel->pNext)
{
++count;
}
// Is it a new max?
if (max < count)
{
max = count;
}
}
return max;
}
// Code to parse a Hound space and record the location of each model in it.
BOOL ParseHoundSpace(HOUND_SPACE *pSpace)
{
BYTE *pScan;
// Initialize limits so we can set them as we go.
g_iMinModelHead = MAX_DENSE_CODES;
g_iMaxModelHead = 0;
// Clear the array.
memset(apModelHead, 0, sizeof(apModelHead));
// Save the space pointer.
g_pHoundSpace = pSpace;
// Skip tables.
pScan = g_pHoundSpace->modelData;
pScan += sizeof(DEPEND_PAIR) * g_pHoundSpace->cPairTable;
pScan += sizeof(DEPEND_WEIGHT) * g_pHoundSpace->cWeightTable;
// Scan the data for the models.
for (; pScan[0] != 0x00 || pScan[1] != 0x00; ) {
UNALIGNED wchar_t *pdchLabel;
BYTE *pModel;
wchar_t dchLabel;
WORD iComponent;
WORD cComponent;
MODEL_ITEM *pModelItem;
MODEL_ITEM **ppScanEnd;
// Remember where model starts.
pModel = pScan;
// Extract the Code point.
pdchLabel = (wchar_t *)pScan;
dchLabel = *pdchLabel;
pScan += 2;
//// Skip the model
// Extract the number of components in the model.
cComponent = *pScan++;
ASSERT(cComponent <= MAX_HOUND_COMPONENTS);
// Skip each mixture component...
for (iComponent = 0; iComponent < cComponent; ++iComponent)
{
// Skip Comp. weight & factor.
if (cComponent > 1)
{
pScan += 2;
}
pScan += 2;
// Skip the diagonal.
pScan += pSpace->numFeat;
// Skip dependencies.
while (*pScan != 0)
{
pScan += (*pScan < g_pHoundSpace->cPairOneByte) ? 1 : 2;
pScan += (*pScan < g_pHoundSpace->cWeightOneByte) ? 1 : 2;
}
++pScan;
}
//// OK, have model, record it.
// Allocate space.
pModelItem = (MODEL_ITEM *)malloc(sizeof(MODEL_ITEM));
if (!pModelItem) {
ASSERT(pModelItem);
return FALSE;
}
// Fill in structure.
pModelItem->pModel = pModel;
pModelItem->sizeModel = (BYTE *)pScan - (BYTE *)pModel;
pModelItem->pNext = (MODEL_ITEM *)0;
// Add to end of list list
ppScanEnd = apModelHead + dchLabel;
while (*ppScanEnd)
{
ppScanEnd = &((*ppScanEnd)->pNext);
}
*ppScanEnd = pModelItem;
// And keep track of min and max labels.
if (g_iMinModelHead > dchLabel)
{
g_iMinModelHead = dchLabel;
}
if (g_iMaxModelHead < dchLabel)
{
g_iMaxModelHead = dchLabel;
}
}
// Figure out max models per code point.
g_maxModelsPerCP = MaxHoundModelsPerCP();
return TRUE;
}
// Print out each code point supported, the number of models it has, and the
// size of each.
BOOL HoundPrintModelList(FILE *pFile)
{
wchar_t dch;
// Process each interesting code point.
for (dch = (wchar_t)g_iMinModelHead; dch <= g_iMaxModelHead; ++dch)
{
int cModels;
MODEL_ITEM *pScan;
// Count the models.
cModels = 0;
for (pScan = apModelHead[dch]; pScan; pScan = pScan->pNext)
{
++cModels;
}
if (cModels > 0)
{
if (fwprintf(pFile, L"+ %04X %d", dch, cModels) < 0)
{
return FALSE;
}
for (pScan = apModelHead[dch]; pScan; pScan = pScan->pNext)
{
if (fwprintf(pFile, L" %d", pScan->sizeModel) < 0)
{
return FALSE;
}
}
if (fwprintf(pFile, L"\n") < 0)
{
return FALSE;
}
}
}
return TRUE;
}
// Given a data sample and a code point, give the score for each model for
// that code point. This fills in the scores in order of the models in
// the DB. The array must be big enough to hold all the scores. The return
// value give the number of entries filled in.
int HoundMatchCodePoint(wchar_t dchLabel, const BYTE * const pSampleVector, double *pScores)
{
int iModel;
MODEL_ITEM *pScan;
// Process each model in the list.
iModel = 0;
for (pScan = apModelHead[dchLabel]; pScan; pScan = pScan->pNext)
{
HOUND_MATCH houndMatch;
// Set up match structure.
houndMatch.numFeat = g_pHoundSpace->numFeat;
houndMatch.cPairOneByte = g_pHoundSpace->cPairOneByte;
houndMatch.cPairTable = g_pHoundSpace->cPairTable;
houndMatch.pPairOneByte = (DEPEND_PAIR *)g_pHoundSpace->modelData;
houndMatch.pPairTwoByte = houndMatch.pPairOneByte + g_pHoundSpace->cPairOneByte;
houndMatch.cWeightOneByte = g_pHoundSpace->cWeightOneByte;
houndMatch.cWeightTable = g_pHoundSpace->cWeightTable;
houndMatch.pWeightOneByte = (DEPEND_WEIGHT *)(houndMatch.pPairOneByte + g_pHoundSpace->cPairTable);
houndMatch.pWeightTwoByte = houndMatch.pWeightOneByte + g_pHoundSpace->cWeightOneByte;
houndMatch.pScanData = pScan->pModel + 2; // Skip label
// Score the model.
pScores[iModel] = lLogDensityMixture(pSampleVector, &houndMatch);;
// Finished this model.
++iModel;
}
return iModel;
}
// Copy one of the loaded models to an output file.
int
HoundCopyModelToFile(FILE *pFile, wchar_t dchLabel, int iModel)
{
int ii;
MODEL_ITEM *pModelItem;
// Find model.
pModelItem = apModelHead[dchLabel];
for (ii = 0; pModelItem && ii < iModel; ++ii, pModelItem = pModelItem->pNext)
;
// Make sure model exists.
if (!pModelItem)
{
return 0;
}
// Write it out.
if (fwrite(pModelItem->pModel, pModelItem->sizeModel, 1, pFile) < 1)
{
return -1;
}
return pModelItem->sizeModel;
}