860 lines
51 KiB
C++
860 lines
51 KiB
C++
/******************************************************************************
|
|
* MiscData.cpp *
|
|
*--------------*
|
|
* This file stores the const data used in various non-normalization front-end
|
|
* code
|
|
*------------------------------------------------------------------------------
|
|
* Copyright (C) 1999 Microsoft Corporation Date: 05/02/2000
|
|
* All Rights Reserved
|
|
*
|
|
****************************************************************** AARONHAL ***/
|
|
|
|
#include "stdafx.h"
|
|
#include"stdsentenum.h"
|
|
|
|
//--- Variable used to make sure initialization of pronunciations only happens once!
|
|
BOOL g_fAbbrevTablesInitialized = false;
|
|
|
|
//--- Helper function used to delete dynamically allocated memory (for the abbreviation
|
|
// table) at DLL exit time...
|
|
void CleanupAbbrevTables( void )
|
|
{
|
|
if ( g_fAbbrevTablesInitialized )
|
|
{
|
|
for ( ULONG i = 0; i < sp_countof( g_AbbreviationTable ); i++ )
|
|
{
|
|
if ( g_AbbreviationTable[i].pPron1 )
|
|
{
|
|
delete [] g_AbbreviationTable[i].pPron1;
|
|
}
|
|
if ( g_AbbreviationTable[i].pPron2 )
|
|
{
|
|
delete [] g_AbbreviationTable[i].pPron2;
|
|
}
|
|
if ( g_AbbreviationTable[i].pPron3 )
|
|
{
|
|
delete [] g_AbbreviationTable[i].pPron3;
|
|
}
|
|
}
|
|
for ( i = 0; i < sp_countof( g_AmbiguousWordTable ); i++ )
|
|
{
|
|
if ( g_AmbiguousWordTable[i].pPron1 )
|
|
{
|
|
delete [] g_AmbiguousWordTable[i].pPron1;
|
|
}
|
|
if ( g_AmbiguousWordTable[i].pPron2 )
|
|
{
|
|
delete [] g_AmbiguousWordTable[i].pPron2;
|
|
}
|
|
if ( g_AmbiguousWordTable[i].pPron3 )
|
|
{
|
|
delete [] g_AmbiguousWordTable[i].pPron3;
|
|
}
|
|
}
|
|
for ( i = 0; i < sp_countof( g_PostLexLookupWordTable ); i++ )
|
|
{
|
|
if ( g_PostLexLookupWordTable[i].pPron1 )
|
|
{
|
|
delete [] g_PostLexLookupWordTable[i].pPron1;
|
|
}
|
|
if ( g_PostLexLookupWordTable[i].pPron2 )
|
|
{
|
|
delete [] g_PostLexLookupWordTable[i].pPron2;
|
|
}
|
|
if ( g_PostLexLookupWordTable[i].pPron3 )
|
|
{
|
|
delete [] g_PostLexLookupWordTable[i].pPron3;
|
|
}
|
|
}
|
|
if ( g_pOfA )
|
|
{
|
|
delete [] g_pOfA;
|
|
}
|
|
if ( g_pOfAn )
|
|
{
|
|
delete [] g_pOfAn;
|
|
}
|
|
}
|
|
}
|
|
|
|
const BrillPatch g_POSTaggerPatches [] =
|
|
{
|
|
{ MS_Adj, MS_Noun, CAP, MS_Unknown, MS_Unknown, NULL, NULL },
|
|
{ MS_Verb, MS_Noun, PREV1T, MS_Adj, MS_Unknown, NULL, NULL },
|
|
{ MS_Verb, MS_Noun, CAP, MS_Unknown, MS_Unknown, NULL, NULL },
|
|
{ MS_Noun, MS_Verb, PREV1T, MS_VAux, MS_Unknown, NULL, NULL },
|
|
{ MS_Conj, MS_Adv, NEXT2T, MS_Conj, MS_Unknown, NULL, NULL },
|
|
{ MS_Adj, MS_Adv, NEXT1T, MS_Verb, MS_Unknown, NULL, NULL },
|
|
{ MS_Adj, MS_Adv, PREV1TNEXT1T, MS_Noun, MS_Unknown, NULL, NULL },
|
|
{ MS_Verb, MS_Noun, PREV2T, MS_Prep, MS_Unknown, NULL, NULL },
|
|
{ MS_Noun, MS_Verb, PREV1T, MS_SubjPron, MS_Unknown, NULL, NULL },
|
|
{ MS_Noun, MS_Verb, PREV1T, MS_Pron, MS_Unknown, NULL, NULL },
|
|
{ MS_Noun, MS_Verb, PREV1T, MS_Adv, MS_Unknown, NULL, NULL },
|
|
{ MS_Verb, MS_Noun, NEXT1T, MS_VAux, MS_Unknown, NULL, NULL },
|
|
{ MS_Adj, MS_Adv, PREV1TNEXT1T, MS_Verb, MS_Adj, NULL, NULL },
|
|
{ MS_Verb, MS_Noun, PREV1TNEXT1T, MS_Noun, MS_Verb, NULL, NULL },
|
|
{ MS_Noun, MS_Adj, NEXT1T, MS_Adj, MS_Unknown, NULL, NULL },
|
|
{ MS_Verb, MS_Noun, NEXT1T, MS_CConj, MS_Unknown, NULL, NULL },
|
|
{ MS_Adj, MS_Verb, PREV1T, MS_VAux, MS_Unknown, NULL, NULL },
|
|
{ MS_Noun, MS_Verb, PREV1TNEXT1T, MS_Prep, MS_Adv, NULL, NULL },
|
|
{ MS_Noun, MS_Verb, PREV1TNEXT1T, MS_Prep, MS_Adj, NULL, NULL },
|
|
{ MS_Adv, MS_Prep, PREV1TNEXT1T, MS_Noun, MS_Prep, NULL, NULL },
|
|
{ MS_Adv, MS_Prep, PREV1TNEXT2T, MS_Verb, MS_Noun, NULL, NULL },
|
|
{ MS_Adj, MS_Adv, NEXT1T, MS_Adv, MS_Unknown, NULL, NULL },
|
|
{ MS_Noun, MS_Verb, NEXT1T, MS_ObjPron, MS_Unknown, NULL, NULL },
|
|
{ MS_Adv, MS_Adj, PREV1TNEXT1T, MS_Prep, MS_Noun, NULL, NULL },
|
|
{ MS_Noun, MS_Verb, PREV1TNEXT1T, MS_Interr, MS_Prep, NULL, NULL },
|
|
{ MS_Adj, MS_Adv, PREV1TNEXT1T, MS_Adv, MS_Adj, NULL, NULL },
|
|
{ MS_Prep, MS_Verb, PREV1T, MS_VAux, MS_Unknown, NULL, NULL },
|
|
{ MS_Adv, MS_Conj, NEXT1T, MS_SubjPron, MS_Unknown, NULL, NULL },
|
|
{ MS_Adj, MS_Noun, PREV1TNEXT1T, MS_Adj, MS_Prep, NULL, NULL },
|
|
{ MS_Noun, MS_Verb, PREV1TNEXT2T, MS_Interr, MS_Noun, NULL, NULL },
|
|
{ MS_Noun, MS_Verb, PREV1T, MS_VAux, MS_Unknown, NULL, NULL },
|
|
{ MS_Verb, MS_Noun, PREV1TNEXT1T, MS_Unknown, MS_Verb, NULL, NULL },
|
|
{ MS_Adj, MS_Verb, PREV1TNEXT1T, MS_Verb, MS_Adj, NULL, NULL },
|
|
{ MS_Conj, MS_Adv, PREV1TNEXT1T, MS_Unknown, MS_Unknown, NULL, NULL },
|
|
{ MS_Adv, MS_Prep, PREV1TNEXT2T, MS_Prep, MS_Unknown, NULL, NULL },
|
|
{ MS_Adj, MS_Adv, PREV2TNEXT1T, MS_Verb, MS_Adj, NULL, NULL },
|
|
{ MS_Adv, MS_Prep, NEXT1T, MS_Noun, MS_Unknown, NULL, NULL },
|
|
{ MS_Verb, MS_Noun, PREV1TNEXT1T, MS_Conj, MS_Verb, NULL, NULL },
|
|
{ MS_Adj, MS_Noun, PREV1TNEXT1T, MS_Prep, MS_Prep, NULL, NULL },
|
|
{ MS_Noun, MS_Verb, PREV1TNEXT1T, MS_Prep, MS_Pron, NULL, NULL },
|
|
{ MS_Adj, MS_Noun, PREV1TNEXT1T, MS_Noun, MS_Verb, NULL, NULL },
|
|
{ MS_Adj, MS_Adv, PREV1T, MS_VAux, MS_Unknown, NULL, NULL },
|
|
{ MS_Adj, MS_Noun, NEXT1T, MS_VAux, MS_Unknown, NULL, NULL },
|
|
{ MS_Adj, MS_Adv, NEXT1T, MS_VAux, MS_Unknown, NULL, NULL },
|
|
{ MS_Adj, MS_Noun, PREV1TNEXT1T, MS_Unknown, MS_Prep, NULL, NULL },
|
|
{ MS_Verb, MS_Noun, PREV2TNEXT1T, MS_Unknown, MS_Verb, NULL, NULL },
|
|
{ MS_Noun, MS_Verb, PREV1T, MS_SubjPron, MS_Unknown, NULL, NULL },
|
|
{ MS_Adj, MS_Verb, PREV1TNEXT1T, MS_Prep, MS_Adv, NULL, NULL },
|
|
{ MS_Adv, MS_Adj, PREV1TNEXT1T, MS_Conj, MS_Noun, NULL, NULL },
|
|
{ MS_Conj, MS_Adv, PREV1TNEXT1T, MS_Prep, MS_Adj, NULL, NULL },
|
|
{ MS_Adv, MS_Prep, NEXT1T, MS_Verb, MS_Unknown, NULL, NULL },
|
|
{ MS_Noun, MS_Adj, NEXT1T, MS_Adj, MS_Unknown, NULL, NULL },
|
|
{ MS_Adv, MS_Conj, NEXT1T, MS_Pron, MS_Unknown, NULL, NULL },
|
|
{ MS_Conj, MS_Adv, PREV1T, MS_VAux, MS_Unknown, NULL, NULL },
|
|
{ MS_Noun, MS_Verb, PREV1T, MS_Adv, MS_Unknown, NULL, NULL },
|
|
{ MS_Verb, MS_Noun, PREV2TNEXT1T, MS_CConj, MS_Unknown, NULL, NULL },
|
|
{ MS_Noun, MS_Verb, PREV1T, MS_VAux, MS_Unknown, NULL, NULL },
|
|
{ MS_Noun, MS_Verb, PREV1T, MS_Pron, MS_Unknown, NULL, NULL },
|
|
{ MS_Adv, MS_Adj, PREV2TNEXT1T, MS_Prep, MS_Noun, NULL, NULL },
|
|
{ MS_Noun, MS_Adj, PREV1TNEXT1T, MS_Adv, MS_Noun, NULL, NULL },
|
|
{ MS_Conj, MS_Adv, PREV1TNEXT1T, MS_Adv, MS_Adj, NULL, NULL },
|
|
//--- Inserted by hand - convert "A" from Det to Noun after Det...
|
|
{ MS_Det, MS_Noun, CURRWPREV1T, MS_Det, MS_Unknown, L"a", NULL },
|
|
};
|
|
|
|
//--- IMPORTANT!!! This list must remain alphabetized for the binary search algorithm to work properly!!!
|
|
AbbrevRecord g_AbbreviationTable[] =
|
|
{
|
|
{ L"a", L"EY 1", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
{ L"amp", L"AE 1 M P", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
{ L"app", L"AE 1 P", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
{ L"apr", L"EY 1 - P R AX L", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
{ L"apt", L"ax - p aa 1 r t - m ax n t", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
{ L"assoc", L"ax - s ow 2 - s iy - ey 1 - sh ax n", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
{ L"asst", L"ax - s ih 1 s - t ax n t", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
{ L"aug", L"ao 1 - g ax s t", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
{ L"ave", L"ae 1 v - ax - n uw 2", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
//--- Initial - never EOS
|
|
{ L"b", L"B IY 1", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
|
|
{ L"bldg", L"b ih 1 l - d ih ng", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
{ L"blvd", L"b uh 1 l - ax - v aa 2 r d", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"bu", L"b uh 1 sh - ax l", MS_Noun, L"b uh 1 sh - ax l Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
//--- Initial - never EOS
|
|
{ L"c", L"s eh 1 l - s iy - ax s", MS_Noun, L"s iy 1", MS_Noun, NULL, MS_Unknown, 0, 3 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"cal", L"k ae 1 l - ax - r iy", MS_Noun, L"k ae 1 l - ax - r iy Z", MS_Noun, L"K AE 1 L", MS_Noun, 0, 0 },
|
|
{ L"cals", L"k ae 1 l - ax - r iy Z", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
{ L"ch", L"ch ae 1 p - t er", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"cl", L"s eh 1 n - t ax - l iy 2 - t er", MS_Noun, L"s eh 1 n - t ax - l iy 2 - t er Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"cm", L"s eh 1 n - t ax - m iy 2 - t er", MS_Noun, L"s eh 1 n - t ax - m iy 2 - t er Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
{ L"cms", L"s eh 1 n - t ax - m iy 2 - t er Z", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
{ L"cntr", L"s eh 1 n - t er", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
//--- IfEOSNotAbbreviation
|
|
{ L"co", L"k ah 1 m - p ax - n iy", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 1, -1 },
|
|
{ L"cont", L"k ax n - t ih 1 n - y uw D", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
{ L"corp", L"k ao 2 r - p ax - r ey 1 - sh ax n", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
//--- IfEOSNotAbbreviation
|
|
{ L"ct", L"k ao 1 r t", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 1, -1 },
|
|
{ L"ctr", L"s eh 1 n - t er", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
//--- IfEOSNotAbbreviation
|
|
{ L"cu", L"k y uw 1 - b ih k", MS_Noun, L"k y uw 1 - b ih k", MS_Noun, NULL, MS_Unknown, 1, 4 },
|
|
//--- Initial - never EOS
|
|
{ L"d", L"D IY 1", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"db", L"d eh 1 s - ax - b ax l", MS_Noun, L"d eh 1 s - ax - b ax l Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
{ L"dec", L"d ih - s eh 1 m - b er", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"deg", L"d ih - g r iy 1", MS_Noun, L"d ih - g r iy 1 z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
{ L"dept", L"d ih - p aa 1 r t - m ax n t", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
{ L"dist", L"d ih 1 s - t r ax k t", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
{ L"doc", L"D AA 1 K", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
{ L"doz", L"d ah 1 z - ax n", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
//--- DoctorDriveAbbreviation
|
|
{ L"dr", L"d aa 1 k - t er", MS_Noun, L"D R AY 1 V", MS_Noun, NULL, MS_Unknown, 0, 1 },
|
|
//--- Initial - never EOS
|
|
{ L"e", L"IY 1", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
|
|
//--- IfEOSNotAbbreviation
|
|
{ L"ed", L"eh 2 jh - ax - k ey 1 - sh ax n", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 1, -1 },
|
|
{ L"esq", L"eh 1 s - k w ay 2 r", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
{ L"est", L"ax - s t ae 1 b - l ax sh T", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
{ L"etc", L"EH T & s eh 1 t - er - ax", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
//--- IfEOSNotAbbreviation
|
|
{ L"ex", L"ih g - z ae 1 m - p ax l", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 1, -1 },
|
|
{ L"ext", L"ih k - s t eh 1 n - sh ax n", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
//--- Initial - never EOS
|
|
{ L"f", L"f ae 1 r - ax n - h ay 2 t", MS_Noun, L"eh 1 f", MS_Noun, NULL, MS_Unknown, 0, 3 },
|
|
{ L"feb", L"f eh 1 b - r uw - eh 2 r - iy", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
//--- IfEOSNotAbbreviation
|
|
{ L"fig", L"f ih 1 g - y er", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 1, -1 },
|
|
//--- IfEOSNotAbbreviation
|
|
{ L"figs", L"f ih 1 g - y er Z", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 1, -1 },
|
|
//--- IfEOSNotAbbreviation
|
|
{ L"fl", L"f l uw 1 - ih d", MS_Noun, L"f l uw 1 - ih d", MS_Noun, NULL, MS_Unknown, 1, 4 },
|
|
{ L"fn", L"f uh 1 t - n ow 2 t", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
{ L"freq", L"f r iy 1 - k w ax n - s iy", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
{ L"fri", L"f r ay 1 - d ey", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"ft", L"F UH 1 T", MS_Noun, L"F IY 1 T", MS_Noun, L"F AO 1 R T", MS_Noun, 0, 0 },
|
|
{ L"fwd", L"f ao 1 r - w er d", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
//--- SingleOrPluralAbbreviation, IfEOSNotAbbreviation
|
|
{ L"g", L"G R AE 1 M", MS_Noun, L"G R AE 1 M Z", MS_Noun, L"JH IY 1", MS_Noun, 1, 0 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"gal", L"g ae 1 l - ax n", MS_Noun, L"g ae 1 l - ax n Z", MS_Noun, L"G AE 1 L", MS_Noun, 0, 0 },
|
|
//--- TITLE - never EOS
|
|
{ L"gen", L"jh eh 1 n - er - ax l", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
|
|
//--- IfEOSNotAbbreviation
|
|
{ L"gov", L"g ah 1 v - er - n ER", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 1, -1 },
|
|
//--- Initial - never EOS
|
|
{ L"h", L"EY 1 CH", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
|
|
//--- IfEOSNotAbbreviation
|
|
{ L"hr", L"AW 1 ER", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 1, -1 },
|
|
{ L"hrs", L"AW 1 ER Z", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
{ L"ht", L"H AY 1 T", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
{ L"hwy", L"h ay 1 w ey", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
{ L"hz", L"H ER 1 T S", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
//--- Initial - never EOS
|
|
{ L"i", L"AY 1", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
|
|
//--- SingleOrPluralAbbreviation, IfEOSNotAbbreviation
|
|
{ L"in", L"IH 1 N CH", MS_Noun, L"IH 1 N CH AX Z", MS_Noun, L"IH 1 N", MS_Prep, 1, 0 },
|
|
{ L"inc", L"ih n - k ao 1 r - p ax - r ey 2 - t AX D", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
{ L"incl", L"ih n - k l uw 1 - d AX D", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
{ L"intl", L"ih 2 n - t er - n ae 1 sh - ax - n ax l", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
//--- IfEOSAndLowercaseNotAbbreviation
|
|
{ L"is", L"ay 1 - l ax n d", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 2, -1 },
|
|
//--- SingleOrPluralAbbreviation, IfEOSNotAbbreviation
|
|
{ L"j", L"JH UW 1 L", MS_Noun, L"JH UW 1 L Z", MS_Noun, L"JH EY 1", MS_Noun, 1, 0 },
|
|
//--- IfEOSNotAbbreviation
|
|
{ L"jan", L"jh ae 1 n - y uw - eh 2 r - iy", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 1, -1 },
|
|
{ L"jr", L"jh uw 1 n - y er", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
{ L"jul", L"jh uh - l ay 1", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
{ L"jun", L"JH UW 1 N", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
//--- Initial - never EOS
|
|
{ L"k", L"k eh 1 l - v ax n", MS_Noun, L"k ey 1", MS_Noun, NULL, MS_Unknown, 0, 3 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"kb", L"k ih 1 l - ax - b ay 2 t", MS_Noun, L"k ih 1 l - ax - b ay 2 t S", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"kcal", L"k ih 1 l - ax - k ae 2 l - ax - r iy", MS_Noun, L"k ih 1 l - ax - k ae 2 l - ax - r iy Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"kg", L"k ih 1 l - ax - g r ae 2 m", MS_Noun, L"k ih 1 l - ax - g r ae 2 m Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
{ L"kgs", L"k ih 1 l - ax - g r ae 2 m Z", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
{ L"khz", L"k ih 1 l - ax - h er 2 t s", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"kj", L"k ih 1 l - ax - jh uw 2 l", MS_Noun, L"k ih 1 l - ax - jh uw 2 l z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"km", L"k ih - l aa 1 m - ih - t er", MS_Noun, L"k ih - l aa 1 m - ih - t er Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"kw", L"k ih 1 l - ax - w aa 2 t", MS_Noun, L"k ih 1 l - ax - w aa 2 t S", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
//--- SingleOrPluralAbbreviation, IfEOSNotAbbreviation
|
|
{ L"l", L"l iy 1 - t er", MS_Noun, L"l iy 1 - t er Z", MS_Noun, L"EH 1 L", MS_Noun, 1, 0 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"lb", L"P AW 1 N D", MS_Noun, L"P AW 1 N D Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
{ L"lbs", L"P AW 1 N D Z", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
{ L"lg", L"L AA 1 R JH", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
{ L"ln", L"l ey 1 n", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
//--- Title - never EOS
|
|
{ L"lt", L"l uw - t eh 1 n - ax n t", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
|
|
{ L"ltd", L"l ih 1 m - ih - t AX D", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
//--- SingleOrPluralAbbreviation, IfEOSNotAbbreviation
|
|
{ L"m", L"M IY 1 T ER", MS_Noun, L"M IY 1 T ER Z", MS_Noun, L"EH 1 M", MS_Noun, 1, 0 },
|
|
//--- IfEOSNotAbbreviation???
|
|
{ L"mar", L"M AA 1 R CH", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"mb", L"m eh 1 g - ax - b ay 2 t", MS_Noun, L"m eh 1 g - ax - b ay 2 t S", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
{ L"mfg", L"m ae 2 n - y ax - f ae 1 k - ch er - IH NG", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"mg", L"m ih 1 l - ax - g r ae 2 m", MS_Noun, L"m ih 1 l - ax - g r ae 2 m Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
{ L"mgr", L"m ae 1 n - ih - jh ER", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
{ L"mgs", L"m ih 1 l - ax - g r ae 2 m Z", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
{ L"mhz", L"m eh 1 g - ax - h er 2 t s", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"mi", L"M AY 1 L", MS_Noun, L"M AY 1 L Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
{ L"mic", L"M AY 1 K", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"min", L"m ih 1 n - ax t", MS_Noun, L"m ih 1 n - ax t S", MS_Noun, L"m ih 1 n - ax - m ax m", MS_Noun, 0, 0 },
|
|
{ L"misc", L"m ih 2 s - ax - l ey 1 - n iy - ax s", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"ml", L"m ih 1 l - ax - l iy 2 - t er", MS_Noun, L"m ih 1 l - ax - l iy 2 - t er Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"mm", L"m ih 1 l - ax - m iy 2 - t er", MS_Noun, L"m ih 1 l - ax - m iy 2 - t er Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
{ L"mon", L"m ah 1 n - d ey", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
//--- TITLE - never EOS
|
|
{ L"mr", L"M IH 1 S - T ER", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
|
|
//--- TITLE - never EOS
|
|
{ L"mrs", L"M IH 1 S - AX Z", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
|
|
//--- IfEOSNotAbbreviation
|
|
{ L"ms", L"M IH 1 Z", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 1, -1 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"msec", L"m ih 2 l - ax - s eh 1 k - ax n d", MS_Noun, L"m ih 2 l - ax - s eh 1 k - ax n d Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
{ L"msecs", L"m ih 2 l - ax - s eh 1 k - ax n d Z", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
{ L"mt", L"M AW 1 N T", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
{ L"mtn", L"m aw 1 n - t ax n", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
//--- Initial - never EOS
|
|
{ L"n", L"EH 1 N", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
|
|
//--- IfEOSNotAbbreviation
|
|
{ L"no", L"N UH 1 M - B ER", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 1, -1 },
|
|
{ L"nov", L"n ow - v eh 1 m - b er", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
//--- Initial - never EOS
|
|
{ L"o", L"OW 1", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
|
|
{ L"oct", L"aa k - t ow 1 - b er", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
//--- IfEOSNotAbbreviation
|
|
{ L"op", L"OW 1 - P AX S", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 1, -1 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"oz", L"AW 1 N S", MS_Noun, L"AW 1 N - S AX Z", MS_Noun, L"AA 1 Z", MS_Noun, 0, 0 },
|
|
{ L"ozs", L"AW 1 N - S AX Z", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
//--- AbbreviationFollowedByDigit
|
|
{ L"p", L"P EY 1 JH", MS_Noun, L"P IY 1", MS_Noun, NULL, MS_Unknown, 0, 2 },
|
|
//--- IfEOSNotAbbreviation
|
|
{ L"pg", L"P EY 1 JH", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 1, -1 },
|
|
{ L"pgs", L"P EY 1 - JH AX Z", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
{ L"pkg", L"p ae 1 k - ih jh", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
{ L"pkwy", L"p aa 1 r k - w ey 2", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
{ L"pl", L"P L EY 1 S", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
{ L"pp", L"P EY 1 - JH AX Z", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
//--- TITLE - never EOS
|
|
{ L"pres", L"p r eh 1 z - ax - d ax n t", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
|
|
//--- TITLE - never EOS
|
|
{ L"prof", L"p r ax - f eh 1 - s ER", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
|
|
{ L"pt", L"P OY 1 N T", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
//--- Initial - never EOS
|
|
{ L"q", L"K Y UW 1", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"qt", L"k w ao 1 r t", MS_Noun, L"k w ao 1 r t Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
//--- Initial - never EOS
|
|
{ L"r", L"AA 1 R", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
|
|
{ L"rd", L"r ow 1 d", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
{ L"rec", L"R EH 1 K", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
{ L"rep", L"r eh 2 p - r ih - z eh 1 n - t ax - t ih v", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
{ L"rt", L"R UW 1 T", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
{ L"rte", L"R UW 1 T", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
//--- Initial - never EOS
|
|
{ L"s", L"EH 1 S", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
|
|
//--- IfEOSAndLowercaseNotAbbreviation
|
|
{ L"sat", L"s ae 1 t - er - d ey", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 2, -1 },
|
|
//--- SingleOrPluralAbbreviation, IfEOSNotAbbreviation
|
|
{ L"sec", L"s eh 1 k - ax n d", MS_Noun, L"s eh 1 k - ax n d z", MS_Noun, NULL, MS_Unknown, 1, 0 },
|
|
//--- TITLE - never EOS
|
|
{ L"sen", L"s eh 1 n - ax - t ER", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
|
|
{ L"sep", L"s eh p - t eh 1 m - b er", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
{ L"sept", L"s eh p - t eh 1 m - b er", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
{ L"sm", L"S M AO 1 L", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
{ L"sq", L"S K W EH 1 R", MS_Noun, L"S K W EH 1 R", MS_Noun, NULL, MS_Unknown, 0, 4 },
|
|
//--- IfEOSNotAbbreviation
|
|
{ L"sr", L"s iy 1 n - y er", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 1, -1 },
|
|
//--- DoctorDriveAbbreviation
|
|
{ L"st", L"S EY 1 N T", MS_Noun, L"S T R IY 1 T", MS_Noun, NULL, MS_Unknown, 0, 1 },
|
|
{ L"ste", L"s w iy 1 t", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
//--- IfEOSNotAbbreviation
|
|
{ L"sun", L"s ah 1 n - d ey", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 1, -1 },
|
|
//--- Initial - never EOS
|
|
{ L"t", L"T IY 1", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"tbsp", L"t ey 1 - b ax l - s p uw 2 n", MS_Noun, L"t ey 1 - b ax l - s p uw 2 n Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
{ L"tech", L"T EH 1 K", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
{ L"thu", L"th er 1 z - d ey", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
{ L"thur", L"th er 1 z - d ey", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
{ L"thurs", L"th er 1 z - d ey", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"tsp", L"t iy 1 - s p uw 2 n", MS_Noun, L"t iy 1 - s p uw 2 n Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
{ L"tue", L"t uw 1 z - d ey", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
{ L"tues", L"t uw 1 z - d ey", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
//--- Initial - never EOS
|
|
{ L"u", L"Y UW 1", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
|
|
{ L"univ", L"y uw 2 - n ax - v er 1 - s ih - t iy", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
//--- Initial - never EOS
|
|
{ L"v", L"V IY 1", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
|
|
{ L"ver", L"v er 1 - zh ax n", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
{ L"vers", L"v er 1 - zh ax n", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
{ L"vol", L"v aa 1 l - y uw m", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
{ L"vs", L"v er 1 - s ax s", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
//--- Initial - never EOS
|
|
{ L"w", L"d ah 1 b - ax l - y uw", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
|
|
//--- IfEOSAndLowercaseNotAbbreviation
|
|
{ L"wed", L"w eh 1 n z - d ey", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 2, -1 },
|
|
{ L"wk", L"W IY 1 K", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
{ L"wt", L"W EY 1 T", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
{ L"wy", L"W EY 1", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
//--- Initial - never EOS
|
|
{ L"x", L"EH 1 K S", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
|
|
//--- Initial - never EOS
|
|
{ L"y", L"W AY 1", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"yd", L"Y AA 1 R D", MS_Noun, L"Y AA 1 R D Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
{ L"yds", L"Y AA 1 R D Z", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"yr", L"Y IY 1 R", MS_Noun, L"Y IY 1 R Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
//--- Initial - never EOS
|
|
{ L"z", L"Z IY 1", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
|
|
};
|
|
|
|
//--- IMPORTANT!!! This list must remain alphabetized for the binary search algorithm to work properly!!!
|
|
AbbrevRecord g_AmbiguousWordTable[] =
|
|
{
|
|
//--- ADisambig
|
|
{ L"a", L"EY 1", MS_Noun, L"AX", MS_Det, NULL, MS_Unknown, 0, 8 },
|
|
//--- AllCapsAbbreviation
|
|
{ L"al", L"EY 1 & EH 1 L", MS_Noun, L"AE 1 L", MS_Noun, NULL, MS_Unknown, 0, 3 },
|
|
//--- AllCapsAbbreviation
|
|
{ L"apr", L"ey 1 & p iy 1 & aa 1 r", MS_Noun, L"ey 1 - p r ax l", MS_Noun, NULL, MS_Unknown, 0, 3 },
|
|
//--- AllCapsAbbreviation
|
|
{ L"as", L"EY 1 & EH 1 S", MS_Noun, L"AE 1 Z", MS_Conj, NULL, MS_Unknown, 0, 3 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"bu", L"b uh 1 sh - ax l", MS_Noun, L"b uh 1 sh - ax l Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
//--- DegreeAbbreviation
|
|
{ L"c", L"s eh 1 l - s iy - ax s", MS_Noun, L"s iy 1", MS_Noun, NULL, MS_Unknown, 0, 6 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"cal", L"k ae 1 l - ax - r iy", MS_Noun, L"k ae 1 l - ax - r iy Z", MS_Noun, L"K AE 1 L", MS_Noun, 0, 0 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"cl", L"s eh 1 n - t ax - l iy 2 - t er", MS_Noun, L"s eh 1 n - t ax - l iy 2 - t er Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"cm", L"s eh 1 n - t ax - m iy 2 - t er", MS_Noun, L"s eh 1 n - t ax - m iy 2 - t er Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
//--- AllCapsAbbreviation
|
|
{ L"co", L"S IY 1 & OW 1", MS_Noun, L"k ah 1 m - p ax - n iy", MS_Noun, NULL, MS_Unknown, 0, 3 },
|
|
//--- AllCapsAbbreviation
|
|
{ L"ct", L"s iy 1 & t iy 1", MS_Noun, L"k ao 1 r t", MS_Noun, NULL, MS_Unknown, 0, 3 },
|
|
//--- MeasurementModifier
|
|
{ L"cu", L"S IY 1 & Y UW 1", MS_Noun, L"k y uw 1 - b ih k", MS_Noun, NULL, MS_Unknown, 0, 7 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"db", L"d eh 1 s - ax - b ax l", MS_Noun, L"d eh 1 s - ax - b ax l Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"deg", L"d ih - g r iy 1", MS_Noun, L"d ih - g r iy 1 z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
//--- DoctorDriveAbbreviation
|
|
{ L"dr", L"d aa 1 k - t er", MS_Noun, L"D R AY 1 V", MS_Noun, NULL, MS_Unknown, 0, 1 },
|
|
//--- DegreeAbbreviation
|
|
{ L"f", L"f ae 1 r - ax n - h ay 2 t", MS_Noun, L"eh 1 f", MS_Noun, NULL, MS_Unknown, 0, 6 },
|
|
//--- AbbreviationFollowedByDigit
|
|
{ L"fig", L"f ih 1 g - y er", MS_Noun, L"F IH 1 G", MS_Noun, NULL, MS_Unknown, 0, 2 },
|
|
//--- AbbreviationFollowedByDigit
|
|
{ L"figs", L"f ih 1 g - y er Z", MS_Noun, L"F IH 1 G Z", MS_Noun, NULL, MS_Unknown, 0, 2 },
|
|
//--- MeasurementModifier
|
|
{ L"fl", L"eh 1 f & eh 1 l", MS_Noun, L"f l uw 1 - ih d", MS_Noun, NULL, MS_Unknown, 0, 7 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"ft", L"F UH 1 T", MS_Noun, L"F IY 1 T", MS_Noun, L"F AO 1 R T", MS_Noun, 0, 0 },
|
|
//--- G, Gram, Grams
|
|
{ L"g", L"G R AE 1 M", MS_Noun, L"G R AE 1 M Z", MS_Noun, L"JH IY 1", MS_Noun, 0, 5 },
|
|
//--- DoctorDriveAbbreviation
|
|
{ L"gov", L"g ah 1 v - er - n ER", MS_Noun, L"G AH 1 V", MS_Noun, NULL, MS_Unknown, 0, 1 },
|
|
//--- AllCapsAbbreviation
|
|
{ L"hi", L"EY 1 CH & AY 1", MS_Noun, L"H AY 1", MS_Interjection, NULL, MS_Unknown, 0, 3 },
|
|
//--- HR hour hours
|
|
{ L"hr", L"AW 1 ER", MS_Noun, L"AW 1 ER Z", MS_Noun, L"EY 1 CH AA 1 R", MS_Noun, 0, 5 },
|
|
//--- J, Joule, Joules
|
|
{ L"j", L"JH UW 1 L", MS_Noun, L"JH UW 1 L Z", MS_Noun, L"JH EY 1", MS_Noun, 0, 5 },
|
|
//--- DegreeAbbreviation
|
|
{ L"k", L"k eh 1 l - v ax n", MS_Noun, L"k ey 1", MS_Noun, NULL, MS_Unknown, 0, 6 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"kb", L"k ih 1 l - ax - b ay 2 t", MS_Noun, L"k ih 1 l - ax - b ay 2 t S", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"kcal", L"k ih 1 l - ax - k ae 2 l - ax - r iy", MS_Noun, L"k ih 1 l - ax - k ae 2 l - ax - r iy Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"kg", L"k ih 1 l - ax - g r ae 2 m", MS_Noun, L"k ih 1 l - ax - g r ae 2 m Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"kj", L"k ih 1 l - ax - jh uw 2 l", MS_Noun, L"k ih 1 l - ax - jh uw 2 l z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"km", L"k ih - l aa 1 m - ih - t er", MS_Noun, L"k ih - l aa 1 m - ih - t er Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"kw", L"k ih 1 l - ax - w aa 2 t", MS_Noun, L"k ih 1 l - ax - w aa 2 t S", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
//--- L, Liter, Liters
|
|
{ L"l", L"l iy 1 - t er", MS_Noun, L"l iy 1 - t er Z", MS_Noun, L"EH 1 L", MS_Noun, 0, 5 },
|
|
//--- AllCapsAbbreviation
|
|
{ L"la", L"EH 1 L & EY 1", MS_Noun, L"L AH 1", MS_Noun, NULL, MS_Unknown, 0, 3 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"lb", L"P AW 1 N D", MS_Noun, L"P AW 1 N D Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
//--- AllCapsAbbreviation
|
|
{ L"lts", L"eh 1 l & t iy 1 & eh 1 s", MS_Noun, L"l uw t eh 1 n ax n t s", MS_Noun, NULL, MS_Unknown, 0, 3 },
|
|
//--- M, Meter, Meters
|
|
{ L"m", L"M IY 1 - T ER", MS_Noun, L"M IY 1 - T ER Z", MS_Noun, L"EH 1 M", MS_Noun, 0, 5 },
|
|
//--- AllCapsAbbreviation
|
|
{ L"ma", L"EH 1 M & AA 1", MS_Noun, L"M AA 1", MS_Noun, NULL, MS_Unknown, 0, 3 },
|
|
//--- March Mar
|
|
{ L"mar", L"M AA 1 R CH", MS_Noun, L"M AA 1 R", MS_Verb, NULL, MS_Unknown, 0, 4 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"mb", L"m eh 1 g - ax - b ay 2 t", MS_Noun, L"m eh 1 g - ax - b ay 2 t S", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
//--- AllCapsAbbreviation
|
|
{ L"me", L"EH 1 M & IY 1", MS_Noun, L"M IY 1", MS_ObjPron, NULL, MS_Unknown, 0, 3 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"mg", L"m ih 1 l - ax - g r ae 2 m", MS_Noun, L"m ih 1 l - ax - g r ae 2 m Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"mi", L"M AY 1 L", MS_Noun, L"M AY 1 L Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"min", L"m ih 1 n - ax t", MS_Noun, L"m ih 1 n - ax t S", MS_Noun, L"m ih 1 n - ax - m ax m", MS_Noun, 0, 0 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"ml", L"m ih 1 l - ax - l iy 2 - t er", MS_Noun, L"m ih 1 l - ax - l iy 2 - t er Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"mm", L"m ih 1 l - ax - m iy 2 - t er", MS_Noun, L"m ih 1 l - ax - m iy 2 - t er Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"mpg", L"m ay 1 l & p er 1 & g ae 1 l ax n", MS_Noun, L"m ay 1 l z & p er 1 & g ae 1 l ax n", MS_Noun, L"eh 1 m & p iy 1 & jh iy 1", MS_Noun, 0, 0 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"mph", L"m ay 1 l & p er 1 & aw 1 er", MS_Noun, L"m ay 1 l z & p er 1 & aw 1 er", MS_Noun, L"eh 1 m & p iy 1 & ey 1 ch", MS_Noun, 0, 0 },
|
|
//--- MS millisecond milliseconds
|
|
{ L"ms", L"m ih 2 l - ax - s eh 1 k - ax n d", MS_Noun, L"m ih 2 l - ax - s eh 1 k - ax n d Z", MS_Noun, L"eh 1 m & eh 1 s", MS_Noun, 0, 5 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"msec", L"m ih 2 l - ax - s eh 1 k - ax n d", MS_Noun, L"m ih 2 l - ax - s eh 1 k - ax n d Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
//--- AllCapsAbbreviation
|
|
{ L"mt", L"EH 1 M & T IY 1", MS_Noun, L"M AW 1 N T", MS_Noun, NULL, MS_Unknown, 0, 3 },
|
|
//--- AllCapsAbbreviation
|
|
{ L"oh", L"OW 1 & EY 1 CH", MS_Noun, L"OW 1", MS_Interjection, NULL, MS_Unknown, 0, 3 },
|
|
//--- AllCapsAbbreviation
|
|
{ L"or", L"OW 1 & AA 1 R", MS_Noun, L"AO 1 R", MS_CConj, NULL, MS_Unknown, 0, 3 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"oz", L"AW 1 N S", MS_Noun, L"AW 1 N S AX Z", MS_Noun, L"AA 1 Z", MS_Noun, 0, 0 },
|
|
//--- AbbreviationFollowedByDigit
|
|
{ L"p", L"P EY 1 JH", MS_Noun, L"P IY 1", MS_Noun, NULL, MS_Unknown, 0, 2 },
|
|
//--- AllCapsAbbreviation
|
|
{ L"pa", L"P IY 1 & EY 1", MS_Noun, L"P AA 1", MS_Noun, NULL, MS_Unknown, 0, 3 },
|
|
//--- AllCapsAbbreviation
|
|
{ L"pg", L"P IY 1 & JH IY 1", MS_Noun, L"P EY 1 JH", MS_Noun, NULL, MS_Unknown, 0, 3 },
|
|
//--- AllCapsAbbreviation
|
|
{ L"po", L"p iy 1 & ow 1", MS_Noun, L"p ow 1", MS_Noun, NULL, MS_Unknown, 0, 3 },
|
|
//--- PolishDisambig
|
|
{ L"polish", L"p ow 1 l - ax sh", MS_Adj, L"p aa 1 l - ih sh", MS_Verb, L"p aa 1 l - ih sh", MS_Noun, 0, 9 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"qt", L"K W AO 1 R T", MS_Noun, L"K W AO 1 R T Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
//--- Saturday Sat
|
|
{ L"sat", L"s ae 1 t - er - d ey", MS_Noun, L"S AE 1 T", MS_Verb, NULL, MS_Unknown, 0, 4 },
|
|
//--- SEC Second Seconds
|
|
{ L"sec", L"s eh 1 k - ax n d", MS_Noun, L"s eh 1 k - ax n d Z", MS_Noun, L"EH 1 S & IY 1 & S IY 1", MS_Noun, 0, 5 },
|
|
//--- MeasurementModifier
|
|
{ L"sq", L"S K W EH 1 R", MS_Noun, L"S K W EH 1 R", MS_Noun, NULL, MS_Unknown, 0, 7 },
|
|
//--- AllCapsAbbreviation
|
|
{ L"sr", L"EH 1 S & AA 1 R", MS_Noun, L"s iy 1 n - y er", MS_Noun, NULL, MS_Unknown, 0, 3 },
|
|
//--- DoctorDriveAbbreviation
|
|
{ L"st", L"S EY 1 N T", MS_Noun, L"S T R IY 1 T", MS_Noun, NULL, MS_Unknown, 0, 1 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"tbsp", L"t ey 1 - b ax l - s p uw 2 n", MS_Noun, L"t ey 1 - b ax l - s p uw 2 n Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"tsp", L"t iy 1 - s p uw 2 n", MS_Noun, L"t iy 1 - s p uw 2 n Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
//--- AllCapsAbbreviation
|
|
{ L"us", L"Y UW 1 & EH 1 S", MS_Noun, L"AH 1 S", MS_ObjPron, NULL, MS_Unknown, 0, 3 },
|
|
//--- Wednesday Wed
|
|
{ L"wed", L"w eh 1 n z - d ey", MS_Noun, L"W EH 1 D", MS_Verb, NULL, MS_Unknown, 0, 4 },
|
|
//--- AllCapsAbbreviation
|
|
{ L"wy", L"d ah 1 b - ax l - y uw & W AY 1", MS_Noun, L"W EY 1", MS_Noun, NULL, MS_Unknown, 0, 3 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"yd", L"Y AA 1 R D", MS_Noun, L"Y AA 1 R D Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
//--- SingleOrPluralAbbreviation
|
|
{ L"yr", L"Y IY 1 R", MS_Noun, L"Y IY 1 R Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
};
|
|
|
|
//--- IMPORTANT!!! This list must remain alphabetized for the binary search algorithm to work properly!!!
|
|
AbbrevRecord g_PostLexLookupWordTable[] =
|
|
{
|
|
//--- MeasurementDisambig
|
|
{ L"bu", L"b uh 1 sh - ax l", MS_Noun, L"b uh 1 sh - ax l Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
//--- MeasurementDisambig
|
|
{ L"cal", L"k ae 1 l - ax - r iy", MS_Noun, L"k ae 1 l - ax - r iy Z", MS_Noun, L"K AE 1 L", MS_Noun, 0, 0 },
|
|
//--- MeasurementDisambig
|
|
{ L"cl", L"s eh 1 n - t ax - l iy 2 - t er", MS_Noun, L"s eh 1 n - t ax - l iy 2 - t er Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
//--- MeasurementDisambig
|
|
{ L"cm", L"s eh 1 n - t ax - m iy 2 - t er", MS_Noun, L"s eh 1 n - t ax - m iy 2 - t er Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
//--- MeasurementDisambig
|
|
{ L"db", L"d eh 1 s - ax - b ax l", MS_Noun, L"d eh 1 s - ax - b ax l Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
//--- MeasurementDisambig
|
|
{ L"deg", L"d ih - g r iy 1", MS_Noun, L"d ih - g r iy 1 z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
//--- MeasurementDisambig
|
|
{ L"ft", L"F UH 1 T", MS_Noun, L"F IY 1 T", MS_Noun, L"F AO 1 R T", MS_Noun, 0, 0 },
|
|
//--- MeasurementDisambig
|
|
{ L"g", L"G R AE 1 M", MS_Noun, L"G R AE 1 M Z", MS_Noun, L"JH IY 1", MS_Noun, 0, 0 },
|
|
//--- MeasurementDisambig
|
|
{ L"gal", L"g ae 1 l - ax n", MS_Noun, L"g ae 1 l - ax n Z", MS_Noun, L"G AE 1 L", MS_Noun, 0, 0 },
|
|
//--- MeasurementDisambig
|
|
{ L"hr", L"AW 1 ER", MS_Noun, L"AW 1 ER Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
//--- MeasurementDisambig
|
|
{ L"in", L"IH 1 N CH", MS_Noun, L"IH 1 N CH AX Z", MS_Noun, L"IH 1 N", MS_Prep, 0, 0 },
|
|
//--- MeasurementDisambig
|
|
{ L"j", L"JH UW 1 L", MS_Noun, L"JH UW 1 L Z", MS_Noun, L"JH EY 1", MS_Noun, 0, 0 },
|
|
//--- MeasurementDisambig
|
|
{ L"kb", L"k ih 1 l - ax - b ay 2 t", MS_Noun, L"k ih 1 l - ax - b ay 2 t S", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
//--- MeasurementDisambig
|
|
{ L"kcal", L"k ih 1 l - ax - k ae 2 l - ax - r iy", MS_Noun, L"k ih 1 l - ax - k ae 2 l - ax - r iy Z", MS_Noun, L"K AE 1 L", MS_Noun, 0, 0 },
|
|
//--- MeasurementDisambig
|
|
{ L"kg", L"k ih 1 l - ax - g r ae 2 m", MS_Noun, L"k ih 1 l - ax - g r ae 2 m Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
//--- MeasurementDisambig
|
|
{ L"kj", L"k ih 1 l - ax - jh uw 2 l", MS_Noun, L"k ih 1 l - ax - jh uw 2 l z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
//--- MeasurementDisambig
|
|
{ L"km", L"k ih - l aa 1 m - ih - t er", MS_Noun, L"k ih - l aa 1 m - ih - t er Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
//--- MeasurementDisambig
|
|
{ L"kw", L"k ih 1 l - ax - w aa 2 t", MS_Noun, L"k ih 1 l - ax - w aa 2 t S", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
//--- MeasurementDisambig
|
|
{ L"l", L"l iy 1 - t er", MS_Noun, L"l iy 1 - t er Z", MS_Noun, L"EH 1 L", MS_Noun, 0, 0 },
|
|
//--- MeasurementDisambig
|
|
{ L"lb", L"P AW 1 N D", MS_Noun, L"P AW 1 N D Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
//--- MeasurementDisambig
|
|
{ L"m", L"M IY 1 - T ER", MS_Noun, L"M IY 1 - T ER Z", MS_Noun, L"EH 1 M", MS_Noun, 0, 0 },
|
|
//--- MeasurementDisambig
|
|
{ L"mb", L"m eh 1 g - ax - b ay 2 t", MS_Noun, L"m eh 1 g - ax - b ay 2 t S", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
//--- MeasurementDisambig
|
|
{ L"mg", L"m ih 1 l - ax - g r ae 2 m", MS_Noun, L"m ih 1 l - ax - g r ae 2 m Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
//--- MeasurementDisambig
|
|
{ L"mi", L"M AY 1 L", MS_Noun, L"M AY 1 L Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
//--- MeasurementDisambig
|
|
{ L"min", L"m ih 1 n - ax t", MS_Noun, L"m ih 1 n - ax t S", MS_Noun, L"m ih 1 n - ax - m ax m", MS_Noun, 0, 0 },
|
|
//--- ReadDisambig
|
|
{ L"misread", L"m ih s - r iy 1 d", MS_Verb, L"m ih s - r eh 1 d", MS_Verb, NULL, MS_Unknown, 0, 2 },
|
|
//--- MeasurementDisambig
|
|
{ L"ml", L"m ih 1 l - ax - l iy 2 - t er", MS_Noun, L"m ih 1 l - ax - l iy 2 - t er Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
//--- MeasurementDisambig
|
|
{ L"mm", L"m ih 1 l - ax - m iy 2 - t er", MS_Noun, L"m ih 1 l - ax - m iy 2 - t er Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
//--- MeasurementDisambig
|
|
{ L"mpg", L"m ay 1 l & p er 1 & g ae 1 l ax n", MS_Noun, L"m ay 1 l z & p er 1 & g ae 1 l ax n", MS_Noun, L"eh 1 m & p iy 1 & jh iy 1", MS_Noun, 0, 0 },
|
|
//--- MeasurementDisambig
|
|
{ L"mph", L"m ay 1 l & p er 1 & aw 1 er", MS_Noun, L"m ay 1 l z & p er 1 & aw 1 er", MS_Noun, L"eh 1 m & p iy 1 & ey 1 ch", MS_Noun, 0, 0 },
|
|
//--- MeasurementDisambig
|
|
{ L"msec", L"m ih 2 l - ax - s eh 1 k - ax n d", MS_Noun, L"m ih 2 l - ax - s eh 1 k - ax n d Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
//--- MeasurementDisambig
|
|
{ L"oz", L"AW 1 N S", MS_Noun, L"AW 1 N S AX Z", MS_Noun, L"AA 1 Z", MS_Noun, 0, 0 },
|
|
//--- ReadDisambig
|
|
{ L"proofread", L"p r uw 1 f - r iy 2 d", MS_Verb, L"p r uw 1 f - r eh 2 d", MS_Verb, NULL, MS_Unknown, 0, 2 },
|
|
//--- MeasurementDisambig
|
|
{ L"qt", L"K W AO 1 R T", MS_Noun, L"K W AO 1 R T Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
//--- ReadDisambig
|
|
{ L"read", L"R IY 1 D", MS_Verb, L"R EH 1 D", MS_Verb, NULL, MS_Unknown, 0, 2 },
|
|
//--- MeasurementDisambig
|
|
{ L"sec", L"S EH 1 k - ax n d", MS_Noun, L"s eh 1 k - ax n d z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
//--- MeasurementDisambig
|
|
{ L"tbsp", L"t ey 1 - b ax l - s p uw 2 n", MS_Noun, L"t ey 1 - b ax l - s p uw 2 n Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
//--- TheDisambig
|
|
{ L"the", L"DH IY 2", MS_Det, L"DH AX 2", MS_Det, NULL, MS_Unknown, 0, 1 },
|
|
//--- MeasurementDisambig
|
|
{ L"tsp", L"t iy 1 - s p uw 2 n", MS_Noun, L"t iy 1 - s p uw 2 n Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
//--- MeasurementDisambig
|
|
{ L"yd", L"Y AA 1 R D", MS_Noun, L"Y AA 1 R D Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
//--- MeasurementDisambig
|
|
{ L"yr", L"Y IY 1 R", MS_Noun, L"Y IY 1 R Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
|
|
|
|
};
|
|
|
|
WCHAR *g_pOfA = L"ah 2 v & ax 2 &";
|
|
WCHAR *g_pOfAn = L"ah 2 v & ax 2 n &";
|
|
|
|
const SentBreakDisambigFunc g_SentBreakDisambigTable[] =
|
|
{
|
|
CStdSentEnum::IsAbbreviationEOS,
|
|
CStdSentEnum::IfEOSNotAbbreviation,
|
|
CStdSentEnum::IfEOSAndLowercaseNotAbbreviation,
|
|
};
|
|
|
|
const PronDisambigFunc g_PronDisambigTable[] =
|
|
{
|
|
CStdSentEnum::SingleOrPluralAbbreviation,
|
|
CStdSentEnum::DoctorDriveAbbreviation,
|
|
CStdSentEnum::AbbreviationFollowedByDigit,
|
|
CStdSentEnum::DegreeAbbreviation,
|
|
CStdSentEnum::AbbreviationModifier,
|
|
};
|
|
|
|
const PronDisambigFunc g_AmbiguousWordDisambigTable[] =
|
|
{
|
|
CStdSentEnum::SingleOrPluralAbbreviation,
|
|
CStdSentEnum::DoctorDriveAbbreviation,
|
|
CStdSentEnum::AbbreviationFollowedByDigit,
|
|
CStdSentEnum::AllCapsAbbreviation,
|
|
CStdSentEnum::CapitalizedAbbreviation,
|
|
CStdSentEnum::SECAbbreviation,
|
|
CStdSentEnum::DegreeAbbreviation,
|
|
CStdSentEnum::AbbreviationModifier,
|
|
CStdSentEnum::ADisambig,
|
|
CStdSentEnum::PolishDisambig,
|
|
};
|
|
|
|
const PostLexLookupDisambigFunc g_PostLexLookupDisambigTable[] =
|
|
{
|
|
CStdSentEnum::MeasurementDisambig,
|
|
CStdSentEnum::TheDisambig,
|
|
CStdSentEnum::ReadDisambig,
|
|
};
|
|
|
|
//--- IMPORTANT!!! This list must remain alphabetized for the binary search algorithm to work properly!!!
|
|
// This is an alphabetized list of all non-proper-noun words which
|
|
// appear within the list of the 200 most frequent first words
|
|
// in sentences in both the Brown and WSJ corpora.
|
|
const SPLSTR g_FirstWords[] =
|
|
{
|
|
DEF_SPLSTR( "A" ),
|
|
DEF_SPLSTR( "About" ),
|
|
DEF_SPLSTR( "According" ),
|
|
DEF_SPLSTR( "After" ),
|
|
DEF_SPLSTR( "Again" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "All" ),
|
|
DEF_SPLSTR( "Also" ),
|
|
DEF_SPLSTR( "Although" ),
|
|
DEF_SPLSTR( "Among" ),
|
|
DEF_SPLSTR( "An" ),
|
|
DEF_SPLSTR( "And" ),
|
|
DEF_SPLSTR( "Another" ),
|
|
DEF_SPLSTR( "Any" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "Anyway" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "Are" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "As" ),
|
|
DEF_SPLSTR( "At" ),
|
|
DEF_SPLSTR( "Back" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "Because" ),
|
|
DEF_SPLSTR( "Before" ),
|
|
DEF_SPLSTR( "Besides" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "Both" ),
|
|
DEF_SPLSTR( "But" ),
|
|
DEF_SPLSTR( "By" ),
|
|
DEF_SPLSTR( "Can" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "Consequently" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "Dear" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "Despite" ),
|
|
DEF_SPLSTR( "Did" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "Do" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "Does" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "Don't" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "During" ),
|
|
DEF_SPLSTR( "Each" ),
|
|
DEF_SPLSTR( "Early" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "Even" ),
|
|
DEF_SPLSTR( "Every" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "Finally" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "First" ),
|
|
DEF_SPLSTR( "Following" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "For" ),
|
|
DEF_SPLSTR( "Four" ),
|
|
DEF_SPLSTR( "From" ),
|
|
DEF_SPLSTR( "Further" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "Furthermore" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "Generally" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "Given" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "Go" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "Great" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "Had" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "Have" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "Having" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "He" ),
|
|
DEF_SPLSTR( "Her" ),
|
|
DEF_SPLSTR( "Here" ),
|
|
DEF_SPLSTR( "His" ),
|
|
DEF_SPLSTR( "How" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "However" ),
|
|
DEF_SPLSTR( "I" ),
|
|
DEF_SPLSTR( "If" ),
|
|
DEF_SPLSTR( "In" ),
|
|
DEF_SPLSTR( "Indeed" ),
|
|
DEF_SPLSTR( "Initially" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "Instead" ),
|
|
DEF_SPLSTR( "Is" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "It" ),
|
|
DEF_SPLSTR( "Its" ),
|
|
DEF_SPLSTR( "Just" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "Last" ),
|
|
DEF_SPLSTR( "Later" ),
|
|
DEF_SPLSTR( "Let" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "Like" ),
|
|
DEF_SPLSTR( "Many" ),
|
|
DEF_SPLSTR( "Maybe" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "Meanwhile" ),
|
|
DEF_SPLSTR( "More" ),
|
|
DEF_SPLSTR( "Moreover" ),
|
|
DEF_SPLSTR( "Most" ),
|
|
DEF_SPLSTR( "Much" ),
|
|
DEF_SPLSTR( "My" ), // Added to fix bug #385
|
|
DEF_SPLSTR( "Neither" ),
|
|
DEF_SPLSTR( "Never" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "Nevertheless" ),
|
|
DEF_SPLSTR( "New" ),
|
|
DEF_SPLSTR( "Next" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "No" ),
|
|
DEF_SPLSTR( "None" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "Nonetheless" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "Nor" ),
|
|
DEF_SPLSTR( "Not" ),
|
|
DEF_SPLSTR( "Nothing" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "Now" ),
|
|
DEF_SPLSTR( "Of" ),
|
|
DEF_SPLSTR( "On" ),
|
|
DEF_SPLSTR( "Once" ),
|
|
DEF_SPLSTR( "One" ),
|
|
DEF_SPLSTR( "Only" ),
|
|
DEF_SPLSTR( "Or" ),
|
|
DEF_SPLSTR( "Other" ),
|
|
DEF_SPLSTR( "Others" ),
|
|
DEF_SPLSTR( "Our" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "Over" ),
|
|
DEF_SPLSTR( "People" ),
|
|
DEF_SPLSTR( "Perhaps" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "Please" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "Previous" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "Recent" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "Right" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "Second" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "See" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "Several" ),
|
|
DEF_SPLSTR( "She" ),
|
|
DEF_SPLSTR( "Shortly" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "Similarly" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "Since" ),
|
|
DEF_SPLSTR( "So" ),
|
|
DEF_SPLSTR( "Some" ),
|
|
DEF_SPLSTR( "Sometimes" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "Soon" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "Still" ),
|
|
DEF_SPLSTR( "Subsequently" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "Such" ),
|
|
DEF_SPLSTR( "Take" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "That" ),
|
|
DEF_SPLSTR( "The" ),
|
|
DEF_SPLSTR( "Their" ),
|
|
DEF_SPLSTR( "Then" ),
|
|
DEF_SPLSTR( "There" ),
|
|
DEF_SPLSTR( "Thereafter" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "Therefore" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "These" ),
|
|
DEF_SPLSTR( "They" ),
|
|
DEF_SPLSTR( "This" ),
|
|
DEF_SPLSTR( "Those" ),
|
|
DEF_SPLSTR( "Though" ),
|
|
DEF_SPLSTR( "Three" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "Through" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "Thus" ),
|
|
DEF_SPLSTR( "To" ),
|
|
DEF_SPLSTR( "Today" ),
|
|
DEF_SPLSTR( "Two" ),
|
|
DEF_SPLSTR( "Under" ),
|
|
DEF_SPLSTR( "Unlike" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "Until" ),
|
|
DEF_SPLSTR( "Upon" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "We" ),
|
|
DEF_SPLSTR( "Well" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "What" ),
|
|
DEF_SPLSTR( "When" ),
|
|
DEF_SPLSTR( "Where" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "Whether" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "Which" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "While" ),
|
|
DEF_SPLSTR( "Who" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "Why" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "Will" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "With" ),
|
|
DEF_SPLSTR( "Within" ),
|
|
DEF_SPLSTR( "Without" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "Yes" ), // PaulCa added 4/14/99 (Bug 107)
|
|
DEF_SPLSTR( "Yet" ),
|
|
DEF_SPLSTR( "You" ),
|
|
DEF_SPLSTR( "Your" ),
|
|
}; |