// ============================================================================ // Internet Character Set Conversion: Input from HZ-GB-2312 // ============================================================================ #include "private.h" #include "fechrcnv.h" #include "hzgbobj.h" #include "codepage.h" /****************************************************************************** ************************** C O N S T R U C T O R ************************** ******************************************************************************/ CInccHzGbIn::CInccHzGbIn(UINT uCodePage, int nCodeSet) : CINetCodeConverter(uCodePage, nCodeSet) { Reset(); // initialization return ; } /****************************************************************************** ******************************* R E S E T ********************************* ******************************************************************************/ void CInccHzGbIn::Reset() { m_pfnConv = ConvMain; m_pfnCleanUp = CleanUpMain; m_fGBMode = FALSE; m_tcLeadByte = 0 ; m_nESCBytes = 0 ; return ; } /****************************************************************************** ************************* C O N V E R T C H A R ************************* ******************************************************************************/ HRESULT CInccHzGbIn::ConvertChar(UCHAR tc, int cchSrc) { BOOL fDone = (this->*m_pfnConv)(tc); if (fDone) return S_OK; else return E_FAIL; } /****************************************************************************** ***************************** C L E A N U P ***************************** ******************************************************************************/ BOOL CInccHzGbIn::CleanUp() { return (this->*m_pfnCleanUp)(); } /****************************************************************************** **************************** C O N V M A I N **************************** ******************************************************************************/ BOOL CInccHzGbIn::ConvMain(UCHAR tc) { BOOL fDone = TRUE; if (!m_fGBMode) { if (tc == '~') { m_pfnConv = ConvTilde; m_pfnCleanUp = CleanUpTilde; m_nESCBytes = 1 ; } else { fDone = Output(tc); } } else { if (tc >= 0x20 && tc <= 0x7e) { m_pfnConv = ConvDoubleByte; m_pfnCleanUp = CleanUpDoubleByte; m_tcLeadByte = tc; } else { fDone = Output(tc); } } return fDone; } /****************************************************************************** ************************ C L E A N U P M A I N ************************ ******************************************************************************/ BOOL CInccHzGbIn::CleanUpMain() { return TRUE; } /****************************************************************************** *************************** C O N V T I L D E *************************** ******************************************************************************/ BOOL CInccHzGbIn::ConvTilde(UCHAR tc) { m_pfnConv = ConvMain; m_pfnCleanUp = CleanUpMain; m_nESCBytes = 0 ; switch (tc) { case '~': return Output('~'); case '{': m_fGBMode = TRUE; return TRUE; case '\n': return TRUE; // Just eat it default: (void)Output('~'); if (SUCCEEDED(ConvertChar(tc))) return TRUE; else return FALSE; } } /****************************************************************************** *********************** C L E A N U P T I L D E *********************** ******************************************************************************/ BOOL CInccHzGbIn::CleanUpTilde() { m_pfnConv = ConvMain; m_pfnCleanUp = CleanUpMain; return Output('~'); } /****************************************************************************** ********************* C O N V D O U B L E B Y T E ********************* ******************************************************************************/ BOOL CInccHzGbIn::ConvDoubleByte(UCHAR tc) { BOOL fRet ; m_pfnConv = ConvMain; m_pfnCleanUp = CleanUpMain; if (m_tcLeadByte >= 0x21 && m_tcLeadByte <= 0x77 && tc >= 0x21 && tc <= 0x7e) { // Check if GB char (void)Output(m_tcLeadByte | 0x80); fRet = Output(tc | 0x80); } else if (m_tcLeadByte == '~' && tc == '}') { // 0x7e7d m_fGBMode = FALSE; fRet = TRUE; } else if (m_tcLeadByte >= 0x78 && m_tcLeadByte <= 0x7d && tc >= 0x21 && tc <= 0x7e) { // Check if non standard extended code (void)Output((UCHAR)0xa1); // Output blank box symbol fRet = Output((UCHAR)0xf5); } else if (m_tcLeadByte == '~') { (void)Output('~'); // Output blank box symbol fRet = Output(tc); } else if (m_tcLeadByte == ' ') { fRet = Output(tc); } else if (tc == ' ') { (void)Output((UCHAR)0xa1); // Output space symbol fRet = Output((UCHAR)0xa1); } else { (void)Output(m_tcLeadByte); fRet = Output(tc); } m_tcLeadByte = 0 ; return fRet ; } /****************************************************************************** ***************** C L E A N U P D O U B L E B Y T E ***************** ******************************************************************************/ BOOL CInccHzGbIn::CleanUpDoubleByte() { m_pfnConv = ConvMain; m_pfnCleanUp = CleanUpMain; return Output(m_tcLeadByte); } int CInccHzGbIn::GetUnconvertBytes() { if (m_tcLeadByte) return 1; else if ( m_nESCBytes ) return 1; else return 0; } DWORD CInccHzGbIn::GetConvertMode() { return ( m_fGBMode ? 1 : 0 ) ; } void CInccHzGbIn::SetConvertMode(DWORD mode) { Reset(); // initialization if ( mode & 0x01 ) m_fGBMode = TRUE ; else m_fGBMode = FALSE ; return ; } // ============================================================================ // Internet Character Set Conversion: Output to HZ-GB-2312 // ============================================================================ /****************************************************************************** ************************** C O N S T R U C T O R ************************** ******************************************************************************/ CInccHzGbOut::CInccHzGbOut(UINT uCodePage, int nCodeSet, DWORD dwFlag, WCHAR * lpFallBack) : CINetCodeConverter(uCodePage, nCodeSet) { Reset(); // initialization _dwFlag = dwFlag; _lpFallBack = lpFallBack; return ; } /****************************************************************************** ******************************* R E S E T ********************************* ******************************************************************************/ void CInccHzGbOut::Reset() { m_fDoubleByte = FALSE; m_fGBMode = FALSE; m_tcLeadByte = 0 ; return ; } /****************************************************************************** ************************* C O N V E R T C H A R ************************* ******************************************************************************/ HRESULT CInccHzGbOut::ConvertChar(UCHAR tc, int cchSrc) { BOOL fDone = TRUE; HRESULT hr = S_OK; if (!m_fDoubleByte) { // // We're not using IsDBCSLeadByteEx() due to perf. concern // We should assert that our hard code table match IsDBCSLeadByteEx(), // But, MLang ships with down level platforms and assert won't be valid if there is a range change // if (IS_CHS_LEADBYTE(tc)) { m_fDoubleByte = TRUE; m_tcLeadByte = tc; } else { if (m_fGBMode) { Output('~'); fDone = Output('}'); m_fGBMode = FALSE; } // tilde should be encoded as two tildes if (tc == '~') Output('~'); fDone = Output(tc); } } else { m_fDoubleByte = FALSE; // a-ehuang: Bug# 31726, send all out of range code to convert to NCR // RFC 1843 => valid HZ code range: leading byte 0x21 - 0x77, 2nd byte 0x21 - 0x7e if ( (m_tcLeadByte < 0xa1 || m_tcLeadByte > 0xf7) || (tc < 0xa1 || tc > 0xfe) ) // end-31726 { UCHAR szDefaultChar[3] = {0x3f}; // possible DBCS + null if (_lpFallBack && (_dwFlag & MLCONVCHARF_USEDEFCHAR)) { // only take SBCS, no DBCS character if ( 1 != WideCharToMultiByte(CP_CHN_GB, 0, (LPCWSTR)_lpFallBack, 1, (LPSTR)szDefaultChar, ARRAYSIZE(szDefaultChar), NULL, NULL )) szDefaultChar[0] = 0x3f; } // End Escape sequence for NCR entity output if (m_fGBMode) { Output('~'); Output('}'); m_fGBMode = FALSE; } if (_dwFlag & (MLCONVCHARF_NCR_ENTITIZE|MLCONVCHARF_NAME_ENTITIZE)) { char szChar[2]; char szDstStr[10]; WCHAR szwChar[2]; int cCount; szChar[0] = m_tcLeadByte; szChar[1] = tc; if (MultiByteToWideChar(CP_CHN_GB, 0, szChar, 2, szwChar, ARRAYSIZE(szwChar))) { // Output NCR entity Output('&'); Output('#'); _ultoa((unsigned long)szwChar[0], (char*)szDstStr, 10); cCount = lstrlenA(szDstStr); for (int i=0; i< cCount; i++) { Output(szDstStr[i]); } fDone = Output(';'); } else { fDone = Output(szDefaultChar[0]); hr = S_FALSE; } } else { fDone = Output(szDefaultChar[0]); hr = S_FALSE; } } else { if (!m_fGBMode) { Output('~'); Output('{'); m_fGBMode = TRUE; } Output(m_tcLeadByte & 0x7f); fDone = Output(tc & 0x7f); } m_tcLeadByte = 0 ; } if (!fDone) hr = E_FAIL; return hr; } /****************************************************************************** ***************************** C L E A N U P ***************************** ******************************************************************************/ BOOL CInccHzGbOut::CleanUp() { if (!m_fGBMode) { return TRUE; } else { m_fGBMode = FALSE ; (void)Output('~'); return Output('}'); } } int CInccHzGbOut::GetUnconvertBytes() { if (m_tcLeadByte) return 1; else return 0; } DWORD CInccHzGbOut::GetConvertMode() { return 0 ; } void CInccHzGbOut::SetConvertMode(DWORD mode) { Reset(); // initialization return ; }