windows-nt/Source/XPSP1/NT/inetsrv/intlwb/enu/wordbreaker/spanishtokenizer.cpp

139 lines
3.8 KiB
C++
Raw Permalink Normal View History

2020-09-26 03:20:57 -05:00
#include "base.h"
#include "SpanishTokenizer.h"
#include "WbUtils.h"
CAutoClassPointer<CSpanishDict> g_apSpanishDict;
CSpanishTokenizer::CSpanishTokenizer(
TEXT_SOURCE* pTxtSource,
IWordSink * pWordSink,
IPhraseSink * pPhraseSink,
LCID lcid,
BOOL bQueryTime,
ULONG ulMaxTokenSize) :
CTokenizer(pTxtSource, pWordSink, pPhraseSink, lcid, bQueryTime, ulMaxTokenSize)
{
if (NULL == g_apSpanishDict.Get())
{
CSyncMutexCatcher cs(m_csSpanishDictInit);
if (NULL == g_apSpanishDict.Get())
{
CAutoArrayPointer<WCHAR> apwcsPath;
apwcsPath = CreateFilePath(L"SpanishDict.txt");
if (NULL == g_apSpanishUtil.Get())
{
g_apSpanishUtil = new CSpanishUtil;
}
if (NULL == g_apSpanishDict.Get())
{
g_apSpanishDict = new CSpanishDict(apwcsPath.Get());
}
}
}
}
void CSpanishTokenizer::OutputSimpleToken(
CTokenState& State,
const CCliticsTerm* pTerm)
{
HRESULT hr;
ULONG ulOffsetInTxtSourceBuffer =
m_pCurToken->CalculateStateOffsetInTxtSourceBuffer(State);
if ((TAIL_MATCH_TRUNCATE == pTerm->ulOp) ||
(HEAD_MATCH_TRUNCATE == pTerm->ulOp))
{
if (0 == ( State.m_ulEnd - State.m_ulStart - pTerm->ulLen ))
{
return;
}
hr = m_apWordSink->PutAltWord(
State.m_ulEnd - State.m_ulStart,
&State.m_pwcsToken[State.m_ulStart],
State.m_ulEnd - State.m_ulStart,
ulOffsetInTxtSourceBuffer);
if (FAILED(hr))
{
THROW_HRESULT_EXCEPTION(hr);
}
if (pTerm->ulOp == TAIL_MATCH_TRUNCATE)
{
hr = m_apWordSink->PutWord(
State.m_ulEnd - State.m_ulStart - pTerm->ulLen,
&State.m_pwcsToken[State.m_ulStart],
State.m_ulEnd - State.m_ulStart,
ulOffsetInTxtSourceBuffer);
if (FAILED(hr))
{
THROW_HRESULT_EXCEPTION(hr);
}
}
else
{
Assert(pTerm->ulOp == HEAD_MATCH_TRUNCATE);
hr = m_apWordSink->PutWord(
State.m_ulEnd - State.m_ulStart - pTerm->ulLen,
&State.m_pwcsToken[State.m_ulStart + pTerm->ulLen],
State.m_ulEnd - State.m_ulStart,
ulOffsetInTxtSourceBuffer);
if (FAILED(hr))
{
THROW_HRESULT_EXCEPTION(hr);
}
}
return;
}
WCHAR pwcsAlt[32];
ULONG ulAltLen;
bool bAlt = false;
ULONG ulWordLen = State.m_ulEnd - State.m_ulStart;
if (ulWordLen < 32)
{
g_apSpanishDict->BreakWord(
ulWordLen,
State.m_pwcsToken + State.m_ulStart,
&bAlt,
&ulAltLen,
pwcsAlt);
}
if (bAlt)
{
hr = m_apWordSink->PutAltWord(
ulAltLen,
pwcsAlt,
State.m_ulEnd - State.m_ulStart,
ulOffsetInTxtSourceBuffer
);
if (FAILED(hr))
{
THROW_HRESULT_EXCEPTION(hr);
}
}
hr = m_apWordSink->PutWord(
State.m_ulEnd - State.m_ulStart,
&State.m_pwcsToken[State.m_ulStart],
State.m_ulEnd - State.m_ulStart,
ulOffsetInTxtSourceBuffer
);
if (FAILED(hr))
{
THROW_HRESULT_EXCEPTION(hr);
}
}