windows-nt/Source/XPSP1/NT/shell/ext/mlang/hzgbobj.cpp
2020-09-26 16:20:57 +08:00

390 lines
11 KiB
C++

// ============================================================================
// Internet Character Set Conversion: Input from HZ-GB-2312
// ============================================================================
#include "private.h"
#include "fechrcnv.h"
#include "hzgbobj.h"
#include "codepage.h"
/******************************************************************************
************************** C O N S T R U C T O R **************************
******************************************************************************/
CInccHzGbIn::CInccHzGbIn(UINT uCodePage, int nCodeSet) : CINetCodeConverter(uCodePage, nCodeSet)
{
Reset(); // initialization
return ;
}
/******************************************************************************
******************************* R E S E T *********************************
******************************************************************************/
void CInccHzGbIn::Reset()
{
m_pfnConv = ConvMain;
m_pfnCleanUp = CleanUpMain;
m_fGBMode = FALSE;
m_tcLeadByte = 0 ;
m_nESCBytes = 0 ;
return ;
}
/******************************************************************************
************************* C O N V E R T C H A R *************************
******************************************************************************/
HRESULT CInccHzGbIn::ConvertChar(UCHAR tc, int cchSrc)
{
BOOL fDone = (this->*m_pfnConv)(tc);
if (fDone)
return S_OK;
else
return E_FAIL;
}
/******************************************************************************
***************************** C L E A N U P *****************************
******************************************************************************/
BOOL CInccHzGbIn::CleanUp()
{
return (this->*m_pfnCleanUp)();
}
/******************************************************************************
**************************** C O N V M A I N ****************************
******************************************************************************/
BOOL CInccHzGbIn::ConvMain(UCHAR tc)
{
BOOL fDone = TRUE;
if (!m_fGBMode) {
if (tc == '~') {
m_pfnConv = ConvTilde;
m_pfnCleanUp = CleanUpTilde;
m_nESCBytes = 1 ;
} else {
fDone = Output(tc);
}
} else {
if (tc >= 0x20 && tc <= 0x7e) {
m_pfnConv = ConvDoubleByte;
m_pfnCleanUp = CleanUpDoubleByte;
m_tcLeadByte = tc;
} else {
fDone = Output(tc);
}
}
return fDone;
}
/******************************************************************************
************************ C L E A N U P M A I N ************************
******************************************************************************/
BOOL CInccHzGbIn::CleanUpMain()
{
return TRUE;
}
/******************************************************************************
*************************** C O N V T I L D E ***************************
******************************************************************************/
BOOL CInccHzGbIn::ConvTilde(UCHAR tc)
{
m_pfnConv = ConvMain;
m_pfnCleanUp = CleanUpMain;
m_nESCBytes = 0 ;
switch (tc) {
case '~':
return Output('~');
case '{':
m_fGBMode = TRUE;
return TRUE;
case '\n':
return TRUE; // Just eat it
default:
(void)Output('~');
if (SUCCEEDED(ConvertChar(tc)))
return TRUE;
else
return FALSE;
}
}
/******************************************************************************
*********************** C L E A N U P T I L D E ***********************
******************************************************************************/
BOOL CInccHzGbIn::CleanUpTilde()
{
m_pfnConv = ConvMain;
m_pfnCleanUp = CleanUpMain;
return Output('~');
}
/******************************************************************************
********************* C O N V D O U B L E B Y T E *********************
******************************************************************************/
BOOL CInccHzGbIn::ConvDoubleByte(UCHAR tc)
{
BOOL fRet ;
m_pfnConv = ConvMain;
m_pfnCleanUp = CleanUpMain;
if (m_tcLeadByte >= 0x21 && m_tcLeadByte <= 0x77 && tc >= 0x21 && tc <= 0x7e) { // Check if GB char
(void)Output(m_tcLeadByte | 0x80);
fRet = Output(tc | 0x80);
} else if (m_tcLeadByte == '~' && tc == '}') { // 0x7e7d
m_fGBMode = FALSE;
fRet = TRUE;
} else if (m_tcLeadByte >= 0x78 && m_tcLeadByte <= 0x7d && tc >= 0x21 && tc <= 0x7e) { // Check if non standard extended code
(void)Output((UCHAR)0xa1); // Output blank box symbol
fRet = Output((UCHAR)0xf5);
} else if (m_tcLeadByte == '~') {
(void)Output('~'); // Output blank box symbol
fRet = Output(tc);
} else if (m_tcLeadByte == ' ') {
fRet = Output(tc);
} else if (tc == ' ') {
(void)Output((UCHAR)0xa1); // Output space symbol
fRet = Output((UCHAR)0xa1);
} else {
(void)Output(m_tcLeadByte);
fRet = Output(tc);
}
m_tcLeadByte = 0 ;
return fRet ;
}
/******************************************************************************
***************** C L E A N U P D O U B L E B Y T E *****************
******************************************************************************/
BOOL CInccHzGbIn::CleanUpDoubleByte()
{
m_pfnConv = ConvMain;
m_pfnCleanUp = CleanUpMain;
return Output(m_tcLeadByte);
}
int CInccHzGbIn::GetUnconvertBytes()
{
if (m_tcLeadByte)
return 1;
else if ( m_nESCBytes )
return 1;
else
return 0;
}
DWORD CInccHzGbIn::GetConvertMode()
{
return ( m_fGBMode ? 1 : 0 ) ;
}
void CInccHzGbIn::SetConvertMode(DWORD mode)
{
Reset(); // initialization
if ( mode & 0x01 )
m_fGBMode = TRUE ;
else
m_fGBMode = FALSE ;
return ;
}
// ============================================================================
// Internet Character Set Conversion: Output to HZ-GB-2312
// ============================================================================
/******************************************************************************
************************** C O N S T R U C T O R **************************
******************************************************************************/
CInccHzGbOut::CInccHzGbOut(UINT uCodePage, int nCodeSet, DWORD dwFlag, WCHAR * lpFallBack) : CINetCodeConverter(uCodePage, nCodeSet)
{
Reset(); // initialization
_dwFlag = dwFlag;
_lpFallBack = lpFallBack;
return ;
}
/******************************************************************************
******************************* R E S E T *********************************
******************************************************************************/
void CInccHzGbOut::Reset()
{
m_fDoubleByte = FALSE;
m_fGBMode = FALSE;
m_tcLeadByte = 0 ;
return ;
}
/******************************************************************************
************************* C O N V E R T C H A R *************************
******************************************************************************/
HRESULT CInccHzGbOut::ConvertChar(UCHAR tc, int cchSrc)
{
BOOL fDone = TRUE;
HRESULT hr = S_OK;
if (!m_fDoubleByte)
{
//
// We're not using IsDBCSLeadByteEx() due to perf. concern
// We should assert that our hard code table match IsDBCSLeadByteEx(),
// But, MLang ships with down level platforms and assert won't be valid if there is a range change
//
if (IS_CHS_LEADBYTE(tc))
{
m_fDoubleByte = TRUE;
m_tcLeadByte = tc;
}
else
{
if (m_fGBMode)
{
Output('~');
fDone = Output('}');
m_fGBMode = FALSE;
}
// tilde should be encoded as two tildes
if (tc == '~')
Output('~');
fDone = Output(tc);
}
}
else
{
m_fDoubleByte = FALSE;
// a-ehuang: Bug# 31726, send all out of range code to convert to NCR
// RFC 1843 => valid HZ code range: leading byte 0x21 - 0x77, 2nd byte 0x21 - 0x7e
if ( (m_tcLeadByte < 0xa1 || m_tcLeadByte > 0xf7) || (tc < 0xa1 || tc > 0xfe) )
// end-31726
{
UCHAR szDefaultChar[3] = {0x3f}; // possible DBCS + null
if (_lpFallBack && (_dwFlag & MLCONVCHARF_USEDEFCHAR))
{
// only take SBCS, no DBCS character
if ( 1 != WideCharToMultiByte(CP_CHN_GB, 0,
(LPCWSTR)_lpFallBack, 1,
(LPSTR)szDefaultChar, ARRAYSIZE(szDefaultChar), NULL, NULL ))
szDefaultChar[0] = 0x3f;
}
// End Escape sequence for NCR entity output
if (m_fGBMode)
{
Output('~');
Output('}');
m_fGBMode = FALSE;
}
if (_dwFlag & (MLCONVCHARF_NCR_ENTITIZE|MLCONVCHARF_NAME_ENTITIZE))
{
char szChar[2];
char szDstStr[10];
WCHAR szwChar[2];
int cCount;
szChar[0] = m_tcLeadByte;
szChar[1] = tc;
if (MultiByteToWideChar(CP_CHN_GB, 0, szChar, 2, szwChar, ARRAYSIZE(szwChar)))
{
// Output NCR entity
Output('&');
Output('#');
_ultoa((unsigned long)szwChar[0], (char*)szDstStr, 10);
cCount = lstrlenA(szDstStr);
for (int i=0; i< cCount; i++)
{
Output(szDstStr[i]);
}
fDone = Output(';');
}
else
{
fDone = Output(szDefaultChar[0]);
hr = S_FALSE;
}
}
else
{
fDone = Output(szDefaultChar[0]);
hr = S_FALSE;
}
}
else
{
if (!m_fGBMode)
{
Output('~');
Output('{');
m_fGBMode = TRUE;
}
Output(m_tcLeadByte & 0x7f);
fDone = Output(tc & 0x7f);
}
m_tcLeadByte = 0 ;
}
if (!fDone)
hr = E_FAIL;
return hr;
}
/******************************************************************************
***************************** C L E A N U P *****************************
******************************************************************************/
BOOL CInccHzGbOut::CleanUp()
{
if (!m_fGBMode) {
return TRUE;
} else {
m_fGBMode = FALSE ;
(void)Output('~');
return Output('}');
}
}
int CInccHzGbOut::GetUnconvertBytes()
{
if (m_tcLeadByte)
return 1;
else
return 0;
}
DWORD CInccHzGbOut::GetConvertMode()
{
return 0 ;
}
void CInccHzGbOut::SetConvertMode(DWORD mode)
{
Reset(); // initialization
return ;
}