346 lines
14 KiB
C
346 lines
14 KiB
C
//$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
|
|
//
|
|
// Copyright (c) 2001 Microsoft Corporation. All rights reserved.
|
|
//
|
|
// Module:
|
|
// volcano/inc/lattice.h
|
|
//
|
|
// Description:
|
|
// Holds the internal structures related to the lattice for Volcano,
|
|
// as well as the functions used to manipulate the lattice.
|
|
//
|
|
// Author:
|
|
// hrowley
|
|
//
|
|
//$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
|
|
|
|
#pragma once
|
|
|
|
// Uncomment this (or put it in the project settings) to enable tuning code.
|
|
// The tuning code writes out data to the file c:\tune.log when the SearchForTargetResult()
|
|
// API function is called, saving the components of the score for the best path and all
|
|
// alternate paths through the lattice. This allows a simple program to adjust the linear
|
|
// weightings of these components to maximize the number of times the correct path is chosen.
|
|
//#define HWX_TUNE
|
|
|
|
//#define USE_IFELANG3_BIGRAMS
|
|
|
|
#include <windows.h>
|
|
|
|
#include "vtune.h"
|
|
#include "charrec.h"
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
// References to the various databases that get loaded
|
|
// Eventually this stuff, along with a few other globals, should
|
|
// be placed in a structure so we can have multiple languages
|
|
// active at once.
|
|
extern BBOX_PROB_TABLE *g_pProbTable;
|
|
extern UNIGRAM_INFO g_unigramInfo;
|
|
extern BIGRAM_INFO g_bigramInfo;
|
|
extern CLASS_BIGRAM_INFO g_classBigramInfo;
|
|
extern TTUNE_INFO g_ttuneInfo;
|
|
extern wchar_t g_pLocale[16];
|
|
extern wchar_t g_pLocaleDir[1024];
|
|
extern wchar_t g_pRecogDir[1024];
|
|
extern HINSTANCE g_hDLL;
|
|
extern CENTIPEDE_INFO g_centipedeInfo;
|
|
extern VOLCANO_PARAMS_INFO g_vtuneInfo; // Tuning parameters
|
|
extern VOLCANO_CONFIG g_latticeConfigInfo; // Configuration data
|
|
extern JAWS_LOAD_INFO g_JawsLoadInfo;
|
|
extern FUGU_LOAD_INFO g_FuguLoadInfo;
|
|
extern SOLE_LOAD_INFO g_SoleLoadInfo;
|
|
extern BOOL g_fUseJaws;
|
|
extern BOOL g_fUseZillaHound;
|
|
|
|
// Initialize the configuration info
|
|
void LatticeConfigInit();
|
|
|
|
/////////////////////////////////////////////////////////////////////////////////////
|
|
// References to legacy baseline/height and language model, currently in lattice.c //
|
|
/////////////////////////////////////////////////////////////////////////////////////
|
|
typedef struct tagBOXINFO
|
|
{
|
|
int size; // Absolute size.
|
|
int xheight; // Absolute height to midline.
|
|
int baseline; // Baseline in tablet coordinates.
|
|
int midline; // Midline in tablet coordinates.
|
|
} BOXINFO;
|
|
|
|
FLOAT BaselineTransitionCost(SYM symPrev, RECT rPrev, BOXINFO *biPrev, SYM sym, RECT r, BOXINFO *bi);
|
|
FLOAT HeightTransitionCost(SYM symPrev, RECT rPrev, BOXINFO *biPrev, SYM sym, RECT r, BOXINFO *bi);
|
|
FLOAT HeightBoxCost(SYM sym, RECT r, BOXINFO *bi);
|
|
FLOAT BaselineBoxCost(SYM sym, RECT r, BOXINFO *bi);
|
|
|
|
////////////////////////////
|
|
// Public data structures //
|
|
////////////////////////////
|
|
|
|
typedef struct tagLATTICE_PATH_ELEMENT {
|
|
wchar_t wChar; // Unicode character
|
|
int iStroke, iAlt; // Information to look up the character in the lattice
|
|
int nStrokes, iBoxNum; // Box number
|
|
// FLOAT score; // Raw zilla/otter score
|
|
// RECT bbox; // Bounding box of the character
|
|
} LATTICE_PATH_ELEMENT;
|
|
|
|
// This structure is returned by GetCurrentPath.
|
|
typedef struct tagLATTICE_PATH {
|
|
int nChars; // Number of character in current path
|
|
LATTICE_PATH_ELEMENT *pElem; // The characters themselves
|
|
} LATTICE_PATH;
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// Data structures, which are for the most part internal to the module. //
|
|
//////////////////////////////////////////////////////////////////////////
|
|
|
|
typedef struct tagLATTICE LATTICE;
|
|
|
|
#define MinLogProb Log2Range
|
|
#define MaxStrokesPerCharacter 32
|
|
#define MaxAltsPerStroke 20
|
|
|
|
//#define SCALE_FOR_IFELANG3 -1024
|
|
//#define SCALE_FOR_IFELANG3 -10000
|
|
#define SCALE_FOR_IFELANG3 (-1024.0*log(2.0))
|
|
|
|
// Cache entry for recognition results
|
|
typedef struct CACHE_ENTRY
|
|
{
|
|
int nStrokes; // Number of strokes for this character
|
|
int iRecognizer; // Which recognizer gave an answer
|
|
ALT_LIST alts; // The results
|
|
struct CACHE_ENTRY *pNext; // Pointer to the next cache entry
|
|
} CACHE_ENTRY;
|
|
|
|
// Cache for recognition results
|
|
typedef struct CACHE
|
|
{
|
|
int nStrokes; // How many strokes we have allocated space for
|
|
CACHE_ENTRY **pStrokes; // Pointers to the cache entries for each stroke
|
|
} CACHE;
|
|
|
|
void *AllocateRecognizerCache();
|
|
void FreeRecognizerCache(void *pvCache);
|
|
ALT_LIST *LookupRecognizerCache(void *pvCache, int iStroke, int nStrokes, int *piRecognizer);
|
|
void AddRecognizerCache(void *pvCache, int iStroke, int nStrokes, int iRecognizer, ALT_LIST *pAlts);
|
|
|
|
typedef struct tagLATTICE_ELEMENT {
|
|
BOOL fUsed; // Whether this alternate is in use
|
|
float logProb; // Score for just this alternate, without language model
|
|
float logProbPath; // Score for path to this point, including language model
|
|
int iCharDetectorScore; // score coming from char detector
|
|
int iPathLength; // How many characters are on the best path to this element
|
|
int nStrokes; // Number of strokes in this character
|
|
int iPrevAlt; // Index of previous character in the appropriate column
|
|
int nPrevStrokes; // Number of strokes in the previous character
|
|
wchar_t wChar; // Dense code of this character
|
|
wchar_t wPrevChar; // Dense code of previous character
|
|
RECT bbox; // Bounds of the ink
|
|
RECT writingBox; // Estimated writing box
|
|
int space;
|
|
int area;
|
|
BOOL fCurrentPath; // Whether this alternate is on the best path
|
|
FLOAT score;
|
|
int maxDist;
|
|
int nHits; // How many times this element has occurred on paths
|
|
int iPromptPtr; // How far along we are in the prompt string.
|
|
// Eventually this field could represent the current state
|
|
// of the DFA implementing a factoid.
|
|
#ifdef USE_IFELANG3_BIGRAMS
|
|
int indexIFELang3;
|
|
int nBigrams;
|
|
float bigramLogProbs[MaxAltsPerStroke];
|
|
int bigramAlts[MaxAltsPerStroke];
|
|
#endif
|
|
#ifdef HWX_TUNE
|
|
VOLCANO_WEIGHTS tuneScores; // Store the components of the score for this alternate
|
|
#endif
|
|
} LATTICE_ELEMENT;
|
|
|
|
typedef struct tagLATTICE_ALT_LIST {
|
|
int iBrkNetScore; // is this a hard break point
|
|
BOOL fSpaceAfterStroke; // is there a hard space after this stroke
|
|
int nUsed; // Number of alternates used in this column
|
|
LATTICE_ELEMENT alts[MaxAltsPerStroke]; // Column of alternates
|
|
} LATTICE_ALT_LIST;
|
|
|
|
typedef struct tagSCORES_ALT_LIST {
|
|
VOLCANO_WEIGHTS alts[MaxAltsPerStroke];
|
|
} SCORES_ALT_LIST;
|
|
|
|
typedef struct tagLATTICE {
|
|
RECOG_SETTINGS recogSettings; // Other settings (ALCs, abort flag)
|
|
BOOL fWordMode; // "Word" mode, in which free mode treats all the ink as one char
|
|
BOOL fCoerceMode; // ALCPriority -> ALCValid
|
|
BOOL fSingleSeg; // Return only a single segmentation
|
|
BOOL fUseFactoid; // Whether to use factoid settings or not
|
|
ALC alcFactoid; // ALC from factoid settings
|
|
BYTE *pbFactoidChars; // Bitmask of chars from factoid
|
|
BOOL fSepMode; // "Separate" mode, disables language model
|
|
wchar_t *wszAnswer; // Used during tuning and training, to tell the recognizer in advance
|
|
// what the correct answer is. Also used by separator
|
|
BOOL fUseGuide; // Whether or not to use the guide
|
|
HWXGUIDE guide; // The guide
|
|
int nStrokes; // How many strokes (after merging) are in the pStroke array
|
|
int nStrokesAllocated; // How much space is allocated in the pStroke array
|
|
int nRealStrokes; // How many strokes have been added to the lattice (before merging)
|
|
STROKE *pStroke; // The array of strokes
|
|
LATTICE_ALT_LIST *pAltList; // Array of lattice columns
|
|
wchar_t *wszBefore; // Pre-context (in reverse order, the first character is the one just before the ink)
|
|
wchar_t *wszAfter; // Post-context (in normal order, the first character is the one just after the ink)
|
|
BOOL fProbMode; // Whether the score associated with alt is a log prob or a score
|
|
BOOL fUseIFELang3; // Whether to use IFELang3 (available, and enough characters to be useful)
|
|
int nProcessed; // How many strokes have been processed so far
|
|
BOOL fEndInput; // Whether we have reached the end of the input
|
|
BOOL fIncremental; // Whether we're doing processing incrementally
|
|
int nFixedResult; // The number of strokes for which results have already been returned, and
|
|
// whose interpretation cannot change.
|
|
void *pvCache; // Cache for recognition results, or NULL if unused
|
|
BOOL fUseLM; // Whether to use the language model
|
|
} LATTICE;
|
|
|
|
#ifdef HWX_TUNE
|
|
extern FILE *g_pTuneFile;
|
|
extern int g_iTuneMode;
|
|
#endif
|
|
|
|
///////////////////
|
|
// API Functions //
|
|
///////////////////
|
|
|
|
// Get the number of strokes which have been added to the lattice
|
|
int GetLatticeStrokeCount(LATTICE *lat);
|
|
|
|
// Create an empty lattice data structure. Returns NULL if it fails to allocate memory.
|
|
LATTICE *AllocateLattice();
|
|
|
|
// Create a new lattice with the same settings as the given lattice.
|
|
LATTICE *CreateCompatibleLattice(LATTICE *lat);
|
|
|
|
// Set the ALC values for the underlying boxed recognizer
|
|
void SetLatticeALCValid(LATTICE *lat, ALC alcValid);
|
|
void SetLatticeALCPriority(LATTICE *lat, ALC alcPriority);
|
|
|
|
// Set the guide for the lattice (which will switch things to boxed mode)
|
|
void SetLatticeGuide(LATTICE *lat, HWXGUIDE *pGuide);
|
|
|
|
// Destroy lattice data structure.
|
|
void FreeLattice(LATTICE *lat);
|
|
|
|
// Add a stroke to the lattice, returns TRUE if it succeeds.
|
|
BOOL AddStrokeToLattice(LATTICE *lat, int nInk, POINT *pts, DWORD time);
|
|
|
|
// Update the probabilities in the lattice, including setting current
|
|
// path to the most likely path so far (not including language model).
|
|
// Can be called repeatedly for incremental processing after each stroke.
|
|
BOOL ProcessLattice(LATTICE *lat, BOOL fEndInput);
|
|
|
|
BOOL ProcessLatticeRange(LATTICE *lat, int iStrtStroke, int iEndStroke);
|
|
|
|
// Return the current path in a LATTICE_PATH structure, which contains
|
|
// arrays of the bounding boxes for each character, the stroke counts,
|
|
// and the characters themselves.
|
|
// When called after ProcessLattice, returns the highest probability path.
|
|
// The memory for the path should be freed by the caller.
|
|
BOOL GetCurrentPath(LATTICE *lat, LATTICE_PATH **pPath);
|
|
|
|
// Free a LATTICE_PATH structure returned by GetCurrentPath()
|
|
void FreeLatticePath(LATTICE_PATH *path);
|
|
|
|
// Given a lattice and a path through it, for characters iStartChar through iEndChar
|
|
// inclusive, return the time stamps of the first and last strokes in those characters.
|
|
// Returns FALSE if there are no strokes associated with the characters (eg, spaces)
|
|
BOOL GetCharacterTimeRange(LATTICE *lat, LATTICE_PATH *path, int iStartChar, int iEndChar,
|
|
DWORD *piStartTime, DWORD *piEndTime);
|
|
|
|
// Given a character number in the current path (counting from zero),
|
|
// and the number of alternates to return, it returns alternates for
|
|
// the character which contain the same number of strokes. This means
|
|
// alternate segmentations cannot be returned. The number of alternates
|
|
// actually stored in the array are returned.
|
|
int GetAlternatesForCharacterInCurrentPath(LATTICE *lat, LATTICE_PATH *path, int iChar, int nAlts, wchar_t *pwAlts);
|
|
|
|
// Apply language model to produce a better current path. Currently only
|
|
// runs IFELang3 if available, and asserts otherwise. This will be changed
|
|
// to use the existing unigram/bigram stuff later.
|
|
void ApplyLanguageModel(LATTICE *lat, wchar_t *wszCorrectAnswer);
|
|
|
|
// Load some global tables (the locale table, unigrams, bigrams, and class bigrams)
|
|
// The tables needed by the segmenter are not loaded until needed, this will
|
|
// probably change later. The path is the same format as taken by the file loading
|
|
// functions. This interface will probably change when I read the code in hwx.c
|
|
// to see how it should be done. :)
|
|
BOOL LatticeConfigFile(wchar_t *pRecogDir);
|
|
|
|
// Unload any global tables loaded earlier by LatticeConfig
|
|
BOOL LatticeUnconfigFile();
|
|
|
|
// Load some global tables (the locale table, unigrams, bigrams, and class bigrams)
|
|
// The tables needed by the segmenter are not loaded until needed, this will
|
|
// probably change later. The path is the same format as taken by the file loading
|
|
// functions. This interface will probably change when I read the code in hwx.c
|
|
// to see how it should be done. :)
|
|
BOOL LatticeConfig(HINSTANCE hInst);
|
|
|
|
// Unload any global tables loaded earlier by LatticeConfig
|
|
BOOL LatticeUnconfig();
|
|
|
|
BOOL GetBoxOfAlternateInCurrentPath(LATTICE *pLattice, LATTICE_PATH *path, int iChar, RECT *pRect);
|
|
|
|
///////////////
|
|
// Call flow //
|
|
///////////////
|
|
|
|
// 0. LatticeConfig
|
|
// 1. AllocateLattice
|
|
// 2. For each stroke
|
|
// a. AddStrokeToLattice
|
|
// b. ProcessLattice (this is optional, for incremental processing)
|
|
// c. GetCurrentPath (this is optional, for incremental results)
|
|
// 3. ProcessLattice
|
|
// 4. ApplyLanguageModel (optional, probably not useful at the moment)
|
|
// 5. GetCurrentPath
|
|
// 6. GetAlternatesForCharacterInCurrentPath
|
|
// 7. FreeLatticePath
|
|
// 8. FreeLattice
|
|
// 9. LatticeUnconfig
|
|
|
|
int SearchForTargetResultInternal(LATTICE *lat, wchar_t *wsz);
|
|
|
|
RECT GetAlternateBbox(LATTICE *lat, int iStroke, int iAlt);
|
|
FLOAT GetAlternateRecogScore(LATTICE *lat, int iStroke, int iAlt);
|
|
FLOAT GetAlternateScore(LATTICE *lat, int iStroke, int iAlt);
|
|
FLOAT GetAlternatePathScore(LATTICE *lat, int iStroke, int iAlt);
|
|
int GetAlternateStrokes(LATTICE *lat, int iStroke, int iAlt);
|
|
wchar_t GetAlternateChar(LATTICE *lat, int iStroke, int iAlt);
|
|
|
|
/////////////////////
|
|
// Internal stuff. //
|
|
/////////////////////
|
|
|
|
void BuildStrokeCountRecogAlts(LATTICE *lat, int iStroke, int cStrk);
|
|
int FindFullPath(LATTICE *lat);
|
|
void ClearAltList(LATTICE_ALT_LIST *list);
|
|
void FixupBackPointers (LATTICE *pLat);
|
|
|
|
BOOL LatticeConfigIFELang3();
|
|
BOOL LatticeIFELang3Available();
|
|
BOOL LatticeUnconfigIFELang3();
|
|
|
|
// Flags used to check for valid paths in lattice.
|
|
#define LCF_UNKNOWN 0
|
|
#define LCF_INVALID 1
|
|
#define LCF_VALID 2
|
|
|
|
// Figure out which strokes are valid end of character in paths that span the
|
|
// whole lattice.
|
|
BOOL CheckIfValid(LATTICE *lat, int iStartStroke, int iStroke, BYTE *pValidEnd);
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|