390 lines
11 KiB
C++
390 lines
11 KiB
C++
|
// ============================================================================
|
||
|
// Internet Character Set Conversion: Input from HZ-GB-2312
|
||
|
// ============================================================================
|
||
|
|
||
|
#include "private.h"
|
||
|
#include "fechrcnv.h"
|
||
|
#include "hzgbobj.h"
|
||
|
#include "codepage.h"
|
||
|
|
||
|
/******************************************************************************
|
||
|
************************** C O N S T R U C T O R **************************
|
||
|
******************************************************************************/
|
||
|
|
||
|
CInccHzGbIn::CInccHzGbIn(UINT uCodePage, int nCodeSet) : CINetCodeConverter(uCodePage, nCodeSet)
|
||
|
{
|
||
|
Reset(); // initialization
|
||
|
return ;
|
||
|
}
|
||
|
|
||
|
/******************************************************************************
|
||
|
******************************* R E S E T *********************************
|
||
|
******************************************************************************/
|
||
|
|
||
|
void CInccHzGbIn::Reset()
|
||
|
{
|
||
|
m_pfnConv = ConvMain;
|
||
|
m_pfnCleanUp = CleanUpMain;
|
||
|
m_fGBMode = FALSE;
|
||
|
m_tcLeadByte = 0 ;
|
||
|
m_nESCBytes = 0 ;
|
||
|
return ;
|
||
|
}
|
||
|
|
||
|
/******************************************************************************
|
||
|
************************* C O N V E R T C H A R *************************
|
||
|
******************************************************************************/
|
||
|
|
||
|
HRESULT CInccHzGbIn::ConvertChar(UCHAR tc, int cchSrc)
|
||
|
{
|
||
|
BOOL fDone = (this->*m_pfnConv)(tc);
|
||
|
if (fDone)
|
||
|
return S_OK;
|
||
|
else
|
||
|
return E_FAIL;
|
||
|
}
|
||
|
|
||
|
/******************************************************************************
|
||
|
***************************** C L E A N U P *****************************
|
||
|
******************************************************************************/
|
||
|
|
||
|
BOOL CInccHzGbIn::CleanUp()
|
||
|
{
|
||
|
return (this->*m_pfnCleanUp)();
|
||
|
}
|
||
|
|
||
|
/******************************************************************************
|
||
|
**************************** C O N V M A I N ****************************
|
||
|
******************************************************************************/
|
||
|
|
||
|
BOOL CInccHzGbIn::ConvMain(UCHAR tc)
|
||
|
{
|
||
|
BOOL fDone = TRUE;
|
||
|
|
||
|
if (!m_fGBMode) {
|
||
|
if (tc == '~') {
|
||
|
m_pfnConv = ConvTilde;
|
||
|
m_pfnCleanUp = CleanUpTilde;
|
||
|
m_nESCBytes = 1 ;
|
||
|
} else {
|
||
|
fDone = Output(tc);
|
||
|
}
|
||
|
} else {
|
||
|
if (tc >= 0x20 && tc <= 0x7e) {
|
||
|
m_pfnConv = ConvDoubleByte;
|
||
|
m_pfnCleanUp = CleanUpDoubleByte;
|
||
|
m_tcLeadByte = tc;
|
||
|
} else {
|
||
|
fDone = Output(tc);
|
||
|
}
|
||
|
}
|
||
|
return fDone;
|
||
|
}
|
||
|
|
||
|
/******************************************************************************
|
||
|
************************ C L E A N U P M A I N ************************
|
||
|
******************************************************************************/
|
||
|
|
||
|
BOOL CInccHzGbIn::CleanUpMain()
|
||
|
{
|
||
|
return TRUE;
|
||
|
}
|
||
|
|
||
|
/******************************************************************************
|
||
|
*************************** C O N V T I L D E ***************************
|
||
|
******************************************************************************/
|
||
|
|
||
|
BOOL CInccHzGbIn::ConvTilde(UCHAR tc)
|
||
|
{
|
||
|
m_pfnConv = ConvMain;
|
||
|
m_pfnCleanUp = CleanUpMain;
|
||
|
|
||
|
m_nESCBytes = 0 ;
|
||
|
|
||
|
switch (tc) {
|
||
|
case '~':
|
||
|
return Output('~');
|
||
|
|
||
|
case '{':
|
||
|
m_fGBMode = TRUE;
|
||
|
return TRUE;
|
||
|
|
||
|
case '\n':
|
||
|
return TRUE; // Just eat it
|
||
|
|
||
|
default:
|
||
|
(void)Output('~');
|
||
|
if (SUCCEEDED(ConvertChar(tc)))
|
||
|
return TRUE;
|
||
|
else
|
||
|
return FALSE;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/******************************************************************************
|
||
|
*********************** C L E A N U P T I L D E ***********************
|
||
|
******************************************************************************/
|
||
|
|
||
|
BOOL CInccHzGbIn::CleanUpTilde()
|
||
|
{
|
||
|
m_pfnConv = ConvMain;
|
||
|
m_pfnCleanUp = CleanUpMain;
|
||
|
|
||
|
return Output('~');
|
||
|
}
|
||
|
|
||
|
/******************************************************************************
|
||
|
********************* C O N V D O U B L E B Y T E *********************
|
||
|
******************************************************************************/
|
||
|
|
||
|
BOOL CInccHzGbIn::ConvDoubleByte(UCHAR tc)
|
||
|
{
|
||
|
BOOL fRet ;
|
||
|
m_pfnConv = ConvMain;
|
||
|
m_pfnCleanUp = CleanUpMain;
|
||
|
|
||
|
if (m_tcLeadByte >= 0x21 && m_tcLeadByte <= 0x77 && tc >= 0x21 && tc <= 0x7e) { // Check if GB char
|
||
|
(void)Output(m_tcLeadByte | 0x80);
|
||
|
fRet = Output(tc | 0x80);
|
||
|
} else if (m_tcLeadByte == '~' && tc == '}') { // 0x7e7d
|
||
|
m_fGBMode = FALSE;
|
||
|
fRet = TRUE;
|
||
|
} else if (m_tcLeadByte >= 0x78 && m_tcLeadByte <= 0x7d && tc >= 0x21 && tc <= 0x7e) { // Check if non standard extended code
|
||
|
(void)Output((UCHAR)0xa1); // Output blank box symbol
|
||
|
fRet = Output((UCHAR)0xf5);
|
||
|
} else if (m_tcLeadByte == '~') {
|
||
|
(void)Output('~'); // Output blank box symbol
|
||
|
fRet = Output(tc);
|
||
|
} else if (m_tcLeadByte == ' ') {
|
||
|
fRet = Output(tc);
|
||
|
} else if (tc == ' ') {
|
||
|
(void)Output((UCHAR)0xa1); // Output space symbol
|
||
|
fRet = Output((UCHAR)0xa1);
|
||
|
} else {
|
||
|
(void)Output(m_tcLeadByte);
|
||
|
fRet = Output(tc);
|
||
|
}
|
||
|
m_tcLeadByte = 0 ;
|
||
|
return fRet ;
|
||
|
}
|
||
|
|
||
|
/******************************************************************************
|
||
|
***************** C L E A N U P D O U B L E B Y T E *****************
|
||
|
******************************************************************************/
|
||
|
|
||
|
BOOL CInccHzGbIn::CleanUpDoubleByte()
|
||
|
{
|
||
|
m_pfnConv = ConvMain;
|
||
|
m_pfnCleanUp = CleanUpMain;
|
||
|
|
||
|
return Output(m_tcLeadByte);
|
||
|
}
|
||
|
|
||
|
int CInccHzGbIn::GetUnconvertBytes()
|
||
|
{
|
||
|
if (m_tcLeadByte)
|
||
|
return 1;
|
||
|
else if ( m_nESCBytes )
|
||
|
return 1;
|
||
|
else
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
DWORD CInccHzGbIn::GetConvertMode()
|
||
|
{
|
||
|
return ( m_fGBMode ? 1 : 0 ) ;
|
||
|
}
|
||
|
|
||
|
void CInccHzGbIn::SetConvertMode(DWORD mode)
|
||
|
{
|
||
|
Reset(); // initialization
|
||
|
if ( mode & 0x01 )
|
||
|
m_fGBMode = TRUE ;
|
||
|
else
|
||
|
m_fGBMode = FALSE ;
|
||
|
|
||
|
return ;
|
||
|
}
|
||
|
|
||
|
// ============================================================================
|
||
|
// Internet Character Set Conversion: Output to HZ-GB-2312
|
||
|
// ============================================================================
|
||
|
|
||
|
/******************************************************************************
|
||
|
************************** C O N S T R U C T O R **************************
|
||
|
******************************************************************************/
|
||
|
|
||
|
CInccHzGbOut::CInccHzGbOut(UINT uCodePage, int nCodeSet, DWORD dwFlag, WCHAR * lpFallBack) : CINetCodeConverter(uCodePage, nCodeSet)
|
||
|
{
|
||
|
Reset(); // initialization
|
||
|
_dwFlag = dwFlag;
|
||
|
_lpFallBack = lpFallBack;
|
||
|
return ;
|
||
|
}
|
||
|
|
||
|
/******************************************************************************
|
||
|
******************************* R E S E T *********************************
|
||
|
******************************************************************************/
|
||
|
void CInccHzGbOut::Reset()
|
||
|
{
|
||
|
m_fDoubleByte = FALSE;
|
||
|
m_fGBMode = FALSE;
|
||
|
m_tcLeadByte = 0 ;
|
||
|
return ;
|
||
|
}
|
||
|
|
||
|
/******************************************************************************
|
||
|
************************* C O N V E R T C H A R *************************
|
||
|
******************************************************************************/
|
||
|
|
||
|
HRESULT CInccHzGbOut::ConvertChar(UCHAR tc, int cchSrc)
|
||
|
{
|
||
|
BOOL fDone = TRUE;
|
||
|
HRESULT hr = S_OK;
|
||
|
|
||
|
|
||
|
if (!m_fDoubleByte)
|
||
|
{
|
||
|
//
|
||
|
// We're not using IsDBCSLeadByteEx() due to perf. concern
|
||
|
// We should assert that our hard code table match IsDBCSLeadByteEx(),
|
||
|
// But, MLang ships with down level platforms and assert won't be valid if there is a range change
|
||
|
//
|
||
|
if (IS_CHS_LEADBYTE(tc))
|
||
|
{
|
||
|
m_fDoubleByte = TRUE;
|
||
|
m_tcLeadByte = tc;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
if (m_fGBMode)
|
||
|
{
|
||
|
Output('~');
|
||
|
fDone = Output('}');
|
||
|
m_fGBMode = FALSE;
|
||
|
}
|
||
|
// tilde should be encoded as two tildes
|
||
|
if (tc == '~')
|
||
|
Output('~');
|
||
|
fDone = Output(tc);
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
m_fDoubleByte = FALSE;
|
||
|
// a-ehuang: Bug# 31726, send all out of range code to convert to NCR
|
||
|
// RFC 1843 => valid HZ code range: leading byte 0x21 - 0x77, 2nd byte 0x21 - 0x7e
|
||
|
if ( (m_tcLeadByte < 0xa1 || m_tcLeadByte > 0xf7) || (tc < 0xa1 || tc > 0xfe) )
|
||
|
// end-31726
|
||
|
{
|
||
|
UCHAR szDefaultChar[3] = {0x3f}; // possible DBCS + null
|
||
|
|
||
|
if (_lpFallBack && (_dwFlag & MLCONVCHARF_USEDEFCHAR))
|
||
|
{
|
||
|
// only take SBCS, no DBCS character
|
||
|
if ( 1 != WideCharToMultiByte(CP_CHN_GB, 0,
|
||
|
(LPCWSTR)_lpFallBack, 1,
|
||
|
(LPSTR)szDefaultChar, ARRAYSIZE(szDefaultChar), NULL, NULL ))
|
||
|
szDefaultChar[0] = 0x3f;
|
||
|
}
|
||
|
|
||
|
// End Escape sequence for NCR entity output
|
||
|
if (m_fGBMode)
|
||
|
{
|
||
|
Output('~');
|
||
|
Output('}');
|
||
|
m_fGBMode = FALSE;
|
||
|
}
|
||
|
|
||
|
if (_dwFlag & (MLCONVCHARF_NCR_ENTITIZE|MLCONVCHARF_NAME_ENTITIZE))
|
||
|
{
|
||
|
char szChar[2];
|
||
|
char szDstStr[10];
|
||
|
WCHAR szwChar[2];
|
||
|
int cCount;
|
||
|
|
||
|
szChar[0] = m_tcLeadByte;
|
||
|
szChar[1] = tc;
|
||
|
|
||
|
if (MultiByteToWideChar(CP_CHN_GB, 0, szChar, 2, szwChar, ARRAYSIZE(szwChar)))
|
||
|
{
|
||
|
|
||
|
// Output NCR entity
|
||
|
Output('&');
|
||
|
Output('#');
|
||
|
_ultoa((unsigned long)szwChar[0], (char*)szDstStr, 10);
|
||
|
cCount = lstrlenA(szDstStr);
|
||
|
for (int i=0; i< cCount; i++)
|
||
|
{
|
||
|
Output(szDstStr[i]);
|
||
|
}
|
||
|
fDone = Output(';');
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
fDone = Output(szDefaultChar[0]);
|
||
|
hr = S_FALSE;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
fDone = Output(szDefaultChar[0]);
|
||
|
hr = S_FALSE;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
if (!m_fGBMode)
|
||
|
{
|
||
|
Output('~');
|
||
|
Output('{');
|
||
|
m_fGBMode = TRUE;
|
||
|
}
|
||
|
|
||
|
Output(m_tcLeadByte & 0x7f);
|
||
|
fDone = Output(tc & 0x7f);
|
||
|
}
|
||
|
m_tcLeadByte = 0 ;
|
||
|
}
|
||
|
|
||
|
if (!fDone)
|
||
|
hr = E_FAIL;
|
||
|
|
||
|
return hr;
|
||
|
}
|
||
|
|
||
|
/******************************************************************************
|
||
|
***************************** C L E A N U P *****************************
|
||
|
******************************************************************************/
|
||
|
|
||
|
BOOL CInccHzGbOut::CleanUp()
|
||
|
{
|
||
|
if (!m_fGBMode) {
|
||
|
return TRUE;
|
||
|
} else {
|
||
|
m_fGBMode = FALSE ;
|
||
|
(void)Output('~');
|
||
|
return Output('}');
|
||
|
}
|
||
|
}
|
||
|
|
||
|
int CInccHzGbOut::GetUnconvertBytes()
|
||
|
{
|
||
|
if (m_tcLeadByte)
|
||
|
return 1;
|
||
|
else
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
DWORD CInccHzGbOut::GetConvertMode()
|
||
|
{
|
||
|
return 0 ;
|
||
|
}
|
||
|
|
||
|
void CInccHzGbOut::SetConvertMode(DWORD mode)
|
||
|
{
|
||
|
Reset(); // initialization
|
||
|
return ;
|
||
|
}
|
||
|
|