315 lines
8.7 KiB
C++
315 lines
8.7 KiB
C++
|
#include "base.h"
|
|||
|
#include "SpanishUtils.h"
|
|||
|
|
|||
|
CAutoClassPointer<CSpanishUtil> g_apSpanishUtil = NULL;
|
|||
|
|
|||
|
const CSuffixTerm g_rSpanishSuffix[] =
|
|||
|
{
|
|||
|
{L"et" ,2, 2, TYPE1}, // te
|
|||
|
{L"es" ,2, 2, TYPE1}, // se
|
|||
|
{L"em" ,2, 2, TYPE1}, // me
|
|||
|
{L"son" ,3, 3, TYPE1}, // nos
|
|||
|
{L"sol" ,3, 3, TYPE1}, // los
|
|||
|
{L"sal" ,3, 3, TYPE1}, // las
|
|||
|
{L"sel" ,3, 3, TYPE1}, // les
|
|||
|
{L"ol" ,2, 2, TYPE1}, // lo
|
|||
|
{L"el" ,2, 2, TYPE1}, // le
|
|||
|
{L"al" ,2, 2, TYPE1}, // la
|
|||
|
{L"etes",4, 4, TYPE1}, // sete
|
|||
|
|
|||
|
#ifdef DICT_GEN
|
|||
|
{L"odn\x0e1" ,4, 3, TYPE2}, // <20>ndo
|
|||
|
#endif
|
|||
|
{L"etodn\x0e1" ,6, 5, TYPE2}, // <20>ndote
|
|||
|
{L"esodn\x0e1" ,6, 5, TYPE2}, // <20>ndose
|
|||
|
{L"emodn\x0e1" ,6, 5, TYPE2}, // <20>ndome
|
|||
|
{L"olodn\x0e1" ,6, 5, TYPE2}, // <20>ndolo
|
|||
|
{L"elodn\x0e1" ,6, 5, TYPE2}, // <20>ndole
|
|||
|
{L"alodn\x0e1" ,6, 5, TYPE2}, // <20>ndola
|
|||
|
{L"sonodn\x0e1",7, 6, TYPE2}, // <20>ndonos
|
|||
|
{L"solodn\x0e1",7, 6, TYPE2}, // <20>ndolos
|
|||
|
{L"salodn\x0e1",7, 6, TYPE2}, // <20>ndolas
|
|||
|
{L"selodn\x0e1",7, 6, TYPE2}, // <20>ndoles
|
|||
|
|
|||
|
{L"etne" ,4, 3, TYPE3}, //ente
|
|||
|
{L"esne" ,4, 3, TYPE3}, //en se
|
|||
|
{L"emne" ,4, 3, TYPE3}, //enme
|
|||
|
{L"sonne" ,5, 4, TYPE3}, //ennos
|
|||
|
{L"solne" ,5, 4, TYPE3}, //enlos
|
|||
|
{L"salne" ,5, 4, TYPE3}, //enlas
|
|||
|
{L"selne" ,5, 4, TYPE3}, //enles
|
|||
|
{L"olne" ,4, 3, TYPE3}, //enlo
|
|||
|
{L"elne" ,4, 3, TYPE3}, //enle
|
|||
|
{L"alne" ,4, 3, TYPE3}, //enla
|
|||
|
{L"emetne",6, 5, TYPE3}, //enteme
|
|||
|
|
|||
|
{L"etsom" ,5, 5, TYPE4}, //moste
|
|||
|
{L"essom" ,5, 5, TYPE4}, //mosse
|
|||
|
{L"emsom" ,5, 5, TYPE4}, //mosme
|
|||
|
{L"sonsom" ,6, 6, TYPE4}, //mosnos
|
|||
|
{L"solsom" ,6, 6, TYPE4}, //moslos
|
|||
|
{L"salsom" ,6, 6, TYPE4}, //moslas
|
|||
|
{L"selsom" ,6, 6, TYPE4}, //mosles
|
|||
|
{L"olsom" ,5, 5, TYPE4}, //moslo
|
|||
|
{L"elsom" ,5, 5, TYPE4}, //mosle
|
|||
|
{L"alsom" ,5, 5, TYPE4}, //mosla
|
|||
|
{L"etessom",7, 7, TYPE4}, //mossete
|
|||
|
|
|||
|
{L"soetda",6, 5, TYPE5}, // adteos
|
|||
|
{L"emetda",6, 5, TYPE5}, // adteme
|
|||
|
{L"etda" ,4, 3, TYPE5}, // adte
|
|||
|
{L"esda" ,4, 3, TYPE5}, // adse
|
|||
|
{L"emda" ,4, 3, TYPE5}, // adem
|
|||
|
{L"sonda" ,5, 4, TYPE5}, // adnos
|
|||
|
{L"solda" ,5, 4, TYPE5}, // adlos
|
|||
|
{L"salda" ,5, 4, TYPE5}, // adlas
|
|||
|
{L"selda" ,5, 4, TYPE5}, // adles
|
|||
|
{L"olda" ,4, 3, TYPE5}, // adlo
|
|||
|
{L"elda" ,4, 3, TYPE5}, // adle
|
|||
|
{L"alda" ,4, 3, TYPE5}, // adla
|
|||
|
|
|||
|
{L"etr\x0e1" ,4, 3, TYPE6}, // <20>rte
|
|||
|
{L"esr\x0e1" ,4, 3, TYPE6}, // <20>rse
|
|||
|
{L"emr\x0e1" ,4, 3, TYPE6}, // <20>rme
|
|||
|
{L"sonr\x0e1",5, 4, TYPE6}, // <20>rnos
|
|||
|
{L"solr\x0e1",5, 4, TYPE6}, // <20>rlos
|
|||
|
{L"salr\x0e1",5, 4, TYPE6}, // <20>rlas
|
|||
|
{L"selr\x0e1",5, 4, TYPE6}, // <20>rles
|
|||
|
{L"olr\x0e1" ,4, 3, TYPE6}, // <20>rlo
|
|||
|
{L"elr\x0e1" ,4, 3, TYPE6}, // <20>rle
|
|||
|
{L"alr\x0e1" ,4, 3, TYPE6}, // <20>rla
|
|||
|
|
|||
|
{L"emes" ,4, 4, TYPE7}, // seme
|
|||
|
{L"sones",5, 5, TYPE7}, // senos
|
|||
|
{L"soles",5, 5, TYPE7}, // selos
|
|||
|
{L"oles" ,4, 4, TYPE7}, // selo
|
|||
|
{L"seles",5, 5, TYPE7}, // seles
|
|||
|
{L"eles" ,4, 4, TYPE7}, // sele
|
|||
|
{L"sales",5, 5, TYPE7}, // sesal
|
|||
|
{L"ales" ,4, 4, TYPE7}, // sela
|
|||
|
|
|||
|
{L"emem", 4, 4, TYPE16}, // meme
|
|||
|
{L"sonem",5, 5, TYPE16}, // menos
|
|||
|
|
|||
|
{L"solem",5, 5, TYPE8}, // melos
|
|||
|
{L"olem" ,4, 4, TYPE8}, // melo
|
|||
|
{L"selem",5, 5, TYPE8}, // meles
|
|||
|
{L"elem" ,4, 4, TYPE8}, // mele
|
|||
|
{L"salem",5, 5, TYPE8}, // mesal
|
|||
|
{L"alem" ,4, 4, TYPE8}, // mela
|
|||
|
|
|||
|
{L"emet" ,4, 4, TYPE9}, // teme
|
|||
|
{L"sonet",5, 5, TYPE9}, // tenos
|
|||
|
{L"solet",5, 5, TYPE9}, // telos
|
|||
|
{L"olet" ,4, 4, TYPE9}, // telo
|
|||
|
{L"selet",5, 5, TYPE9}, // teles
|
|||
|
{L"elet" ,4, 4, TYPE9}, // tele
|
|||
|
{L"salet",5, 5, TYPE9}, // tesal
|
|||
|
{L"alet" ,4, 4, TYPE9}, // tela
|
|||
|
|
|||
|
{L"etsoets\x0e9",8, 4, TYPE10}, // <20>steoste
|
|||
|
{L"soets\x0e9" ,6, 2, TYPE10}, // <20>steos
|
|||
|
|
|||
|
{L"sole",4, 0,TYPE11}, // elos
|
|||
|
{L"ole" ,3, 0,TYPE11}, // elo
|
|||
|
{L"eme" ,3, 0,TYPE11}, // eme
|
|||
|
{L"sele",4, 0,TYPE11}, // eles
|
|||
|
{L"ele" ,3, 0,TYPE11}, // ele
|
|||
|
{L"sale",4, 0,TYPE11}, // elas
|
|||
|
{L"ale" ,3, 0,TYPE11}, // ela
|
|||
|
|
|||
|
{L"sona",4, 0,TYPE12}, // anos
|
|||
|
|
|||
|
{L"ese",3, 0, TYPE13}, // ese
|
|||
|
{L"esa",3, 0, TYPE13}, // ase
|
|||
|
|
|||
|
{L"sone",4, 0,TYPE14}, // enos
|
|||
|
|
|||
|
{L"olner",5, 5, TYPE15}, // renlo
|
|||
|
|
|||
|
{L"\0",0,0,0}
|
|||
|
};
|
|||
|
|
|||
|
|
|||
|
CSpanishUtil::CSpanishUtil()
|
|||
|
{
|
|||
|
WCHAR wch;
|
|||
|
for (wch = 0; wch < 256; wch++)
|
|||
|
{
|
|||
|
m_rCharConvert[wch] = towupper(wch);
|
|||
|
m_rAccentConvert[wch] = 0;
|
|||
|
m_rCharCompress[wch] = 0;
|
|||
|
}
|
|||
|
|
|||
|
memset(m_rReverseAccentConvert, 0, sizeof(char) * 16);
|
|||
|
|
|||
|
m_rCharConvert[0xc0] = L'A';
|
|||
|
m_rCharConvert[0xc1] = L'A';
|
|||
|
m_rCharConvert[0xc2] = L'A';
|
|||
|
m_rCharConvert[0xc3] = L'A';
|
|||
|
m_rCharConvert[0xc4] = L'A';
|
|||
|
m_rCharConvert[0xc5] = L'A';
|
|||
|
m_rCharConvert[0xc8] = L'E';
|
|||
|
m_rCharConvert[0xc9] = L'E';
|
|||
|
m_rCharConvert[0xca] = L'E';
|
|||
|
m_rCharConvert[0xcb] = L'E';
|
|||
|
m_rCharConvert[0xcc] = L'I';
|
|||
|
m_rCharConvert[0xcd] = L'I';
|
|||
|
m_rCharConvert[0xce] = L'I';
|
|||
|
m_rCharConvert[0xcf] = L'I';
|
|||
|
m_rCharConvert[0xd2] = L'O';
|
|||
|
m_rCharConvert[0xd3] = L'O';
|
|||
|
m_rCharConvert[0xd4] = L'O';
|
|||
|
m_rCharConvert[0xd5] = L'O';
|
|||
|
m_rCharConvert[0xd6] = L'O';
|
|||
|
m_rCharConvert[0xd9] = L'U';
|
|||
|
m_rCharConvert[0xda] = L'U';
|
|||
|
m_rCharConvert[0xdb] = L'U';
|
|||
|
m_rCharConvert[0xdc] = L'U';
|
|||
|
|
|||
|
m_rCharConvert[0xe0] = L'A';
|
|||
|
m_rCharConvert[0xe1] = L'A';
|
|||
|
m_rCharConvert[0xe2] = L'A';
|
|||
|
m_rCharConvert[0xe3] = L'A';
|
|||
|
m_rCharConvert[0xe4] = L'A';
|
|||
|
m_rCharConvert[0xe5] = L'A';
|
|||
|
m_rCharConvert[0xe8] = L'E';
|
|||
|
m_rCharConvert[0xe9] = L'E';
|
|||
|
m_rCharConvert[0xea] = L'E';
|
|||
|
m_rCharConvert[0xeb] = L'E';
|
|||
|
m_rCharConvert[0xec] = L'I';
|
|||
|
m_rCharConvert[0xed] = L'I';
|
|||
|
m_rCharConvert[0xee] = L'I';
|
|||
|
m_rCharConvert[0xef] = L'I';
|
|||
|
m_rCharConvert[0xf2] = L'O';
|
|||
|
m_rCharConvert[0xf3] = L'O';
|
|||
|
m_rCharConvert[0xf4] = L'O';
|
|||
|
m_rCharConvert[0xf5] = L'O';
|
|||
|
m_rCharConvert[0xf6] = L'O';
|
|||
|
m_rCharConvert[0xf9] = L'U';
|
|||
|
m_rCharConvert[0xfa] = L'U';
|
|||
|
m_rCharConvert[0xfb] = L'U';
|
|||
|
m_rCharConvert[0xfc] = L'U';
|
|||
|
|
|||
|
for (wch = 0; wch < 256; wch++)
|
|||
|
{
|
|||
|
if (m_rCharConvert[wch] >= L'A' && m_rCharConvert[wch] <= L'Z')
|
|||
|
{
|
|||
|
m_rCharCompress[wch] = m_rCharConvert[wch] - L'A' + 1;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
m_rCharCompress[0xD1] = 28;
|
|||
|
m_rCharCompress[0xF1] = 28;
|
|||
|
|
|||
|
|
|||
|
m_rAccentConvert[0xe1] = 1;
|
|||
|
m_rAccentConvert[0xf3] = 2;
|
|||
|
m_rAccentConvert[0xcd] = 3;
|
|||
|
m_rAccentConvert[0xe9] = 4;
|
|||
|
m_rAccentConvert[0xfa] = 5;
|
|||
|
m_rAccentConvert[0xfc] = 6;
|
|||
|
m_rAccentConvert[0x61] = 7;
|
|||
|
m_rAccentConvert[0x6f] = 8;
|
|||
|
m_rAccentConvert[0x69] = 9;
|
|||
|
m_rAccentConvert[0x65] = 10;
|
|||
|
m_rAccentConvert[0x75] = 11;
|
|||
|
|
|||
|
m_rReverseAccentConvert[1] = (WCHAR)0xe1;
|
|||
|
m_rReverseAccentConvert[2] = (WCHAR)0xf3;
|
|||
|
m_rReverseAccentConvert[3] = (WCHAR)0xcd;
|
|||
|
m_rReverseAccentConvert[4] = (WCHAR)0xe9;
|
|||
|
m_rReverseAccentConvert[5] = (WCHAR)0xfa;
|
|||
|
m_rReverseAccentConvert[6] = (WCHAR)0xfc;
|
|||
|
m_rReverseAccentConvert[7] = (WCHAR)0x61;
|
|||
|
m_rReverseAccentConvert[8] = (WCHAR)0x6f;
|
|||
|
m_rReverseAccentConvert[9] = (WCHAR)0x69;
|
|||
|
m_rReverseAccentConvert[10] = (WCHAR)0x65;
|
|||
|
m_rReverseAccentConvert[11] = (WCHAR)0x75;
|
|||
|
|
|||
|
}
|
|||
|
|
|||
|
int CSpanishUtil::aiWcscmp(const WCHAR* p, const WCHAR* t)
|
|||
|
{
|
|||
|
while (*p && *t && (m_rCharConvert[*p] == m_rCharConvert[*t]))
|
|||
|
{
|
|||
|
p++;
|
|||
|
t++;
|
|||
|
}
|
|||
|
|
|||
|
if ((m_rCharConvert[*p] == m_rCharConvert[*t]))
|
|||
|
{
|
|||
|
return 0;
|
|||
|
}
|
|||
|
if ((m_rCharConvert[*p] > m_rCharConvert[*t]))
|
|||
|
{
|
|||
|
return 1;
|
|||
|
}
|
|||
|
|
|||
|
return -1;
|
|||
|
}
|
|||
|
|
|||
|
int CSpanishUtil::aiStrcmp(const unsigned char* p, const unsigned char* t)
|
|||
|
{
|
|||
|
while (*p && *t && (m_rCharConvert[*p] == m_rCharConvert[*t]))
|
|||
|
{
|
|||
|
p++;
|
|||
|
t++;
|
|||
|
}
|
|||
|
|
|||
|
if (m_rCharConvert[*p] == m_rCharConvert[*t])
|
|||
|
{
|
|||
|
return 0;
|
|||
|
}
|
|||
|
if (m_rCharConvert[*p] > m_rCharConvert[*t])
|
|||
|
{
|
|||
|
return 1;
|
|||
|
}
|
|||
|
|
|||
|
return -1;
|
|||
|
}
|
|||
|
|
|||
|
int CSpanishUtil::aiWcsncmp(const WCHAR* p, const WCHAR* t, const int iLen)
|
|||
|
{
|
|||
|
int i = 0;
|
|||
|
while ((i < iLen) && *p && *t && (m_rCharConvert[*p] == m_rCharConvert[*t]))
|
|||
|
{
|
|||
|
p++;
|
|||
|
t++;
|
|||
|
i++;
|
|||
|
}
|
|||
|
|
|||
|
if ((i == iLen) || (m_rCharConvert[*p] == m_rCharConvert[*t]))
|
|||
|
{
|
|||
|
return 0;
|
|||
|
}
|
|||
|
if (m_rCharConvert[*p] > m_rCharConvert[*t])
|
|||
|
{
|
|||
|
return 1;
|
|||
|
}
|
|||
|
|
|||
|
return -1;
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
|
|||
|
CSpanishSuffixDict::CSpanishSuffixDict()
|
|||
|
{
|
|||
|
WCHAR* pwcsCur;
|
|||
|
int i;
|
|||
|
DictStatus status;
|
|||
|
|
|||
|
for (i = 0, pwcsCur = g_rSpanishSuffix[i].pwcs;
|
|||
|
*pwcsCur != L'\0';
|
|||
|
i++, pwcsCur = g_rSpanishSuffix[i].pwcs)
|
|||
|
{
|
|||
|
status = m_SuffixTrie.trie_Insert(
|
|||
|
pwcsCur,
|
|||
|
TRIE_IGNORECASE,
|
|||
|
const_cast<CSuffixTerm*>(&g_rSpanishSuffix[i]),
|
|||
|
NULL);
|
|||
|
|
|||
|
Assert (DICT_SUCCESS == status);
|
|||
|
|
|||
|
}
|
|||
|
}
|