windows-nt/Source/XPSP1/NT/shell/ext/mlang/kscobj.cpp

461 lines
13 KiB
C++
Raw Normal View History

2020-09-26 03:20:57 -05:00
// ============================================================================
// Internet Character Set Conversion: Input from ISO-2022-KR
// ============================================================================
#include "private.h"
#include "fechrcnv.h"
#include "kscobj.h"
#include "codepage.h"
/******************************************************************************
************************** C O N S T R U C T O R **************************
******************************************************************************/
CInccKscIn::CInccKscIn(UINT uCodePage, int nCodeSet) : CINetCodeConverter(uCodePage, nCodeSet)
{
Reset(); // initialization
return ;
}
/******************************************************************************
******************************* R E S E T *********************************
******************************************************************************/
void CInccKscIn::Reset()
{
m_pfnConv = ConvMain;
m_pfnCleanUp = CleanUpMain;
m_fShift = FALSE;
// bug #57570, Korean ISP DACOM only labels one designator in the
// conversion of a MIME mail. To decode the other part of MIME correctly,
// we need to decode the ISO document or MIME message even there is no
// designator "esc ) C".
m_fKorea = TRUE;
m_nESCBytes = 0 ;
m_fLeadByte = FALSE ;
return ;
}
/******************************************************************************
************************* C O N V E R T C H A R *************************
******************************************************************************/
HRESULT CInccKscIn::ConvertChar(UCHAR tc, int cchSrc)
{
BOOL fDone = (this->*m_pfnConv)(tc);
if (fDone)
return S_OK;
else
return E_FAIL;
}
/******************************************************************************
***************************** C L E A N U P *****************************
******************************************************************************/
BOOL CInccKscIn::CleanUp()
{
return (this->*m_pfnCleanUp)();
}
/******************************************************************************
**************************** C O N V M A I N ****************************
******************************************************************************/
BOOL CInccKscIn::ConvMain(UCHAR tc)
{
BOOL fDone = TRUE;
if (tc == ESC) {
m_pfnConv = ConvEsc;
m_pfnCleanUp = CleanUpEsc;
m_nESCBytes++ ;
} else {
if (m_fKorea) {
switch (tc) {
case SO:
m_fShift = TRUE;
break;
case SI:
m_fShift = FALSE;
m_fLeadByte = FALSE ;
break;
default:
if (m_fShift) {
switch (tc) {
case ' ':
case '\t':
case '\n':
fDone = Output(tc);
break;
default:
fDone = Output(tc | 0x80);
m_fLeadByte = ~m_fLeadByte ;
break;
}
} else {
fDone = Output(tc);
}
break;
}
} else {
fDone = Output(tc);
}
}
return fDone;
}
/******************************************************************************
************************ C L E A N U P M A I N ************************
******************************************************************************/
BOOL CInccKscIn::CleanUpMain()
{
return TRUE;
}
/******************************************************************************
***************************** C O N V E S C *****************************
******************************************************************************/
BOOL CInccKscIn::ConvEsc(UCHAR tc)
{
if (tc == ISO2022_IN_CHAR) {
m_pfnConv = ConvIsoIn;
m_pfnCleanUp = CleanUpIsoIn;
m_nESCBytes++ ;
return TRUE;
} else {
m_pfnConv = ConvMain;
m_pfnCleanUp = CleanUpMain;
m_nESCBytes = 0 ;
(void)Output(ESC);
if (SUCCEEDED(ConvertChar(tc)))
return TRUE;
else
return FALSE;
}
}
/******************************************************************************
************************* C L E A N U P E S C *************************
******************************************************************************/
BOOL CInccKscIn::CleanUpEsc()
{
m_pfnConv = ConvMain;
m_pfnCleanUp = CleanUpMain;
m_nESCBytes = 0 ;
return Output(ESC);
}
/******************************************************************************
************************** C O N V I S O I N **************************
******************************************************************************/
BOOL CInccKscIn::ConvIsoIn(UCHAR tc)
{
if (tc == ISO2022_IN_KR_CHAR_1) {
m_pfnConv = ConvIsoInKr;
m_pfnCleanUp = CleanUpIsoInKr;
m_nESCBytes++ ;
return TRUE;
} else {
m_pfnConv = ConvMain;
m_pfnCleanUp = CleanUpMain;
m_nESCBytes = 0 ;
(void)Output(ESC);
(void)ConvertChar(ISO2022_IN_CHAR);
if (SUCCEEDED(ConvertChar(tc)))
return TRUE;
else
return FALSE;
}
}
/******************************************************************************
********************** C L E A N U P I S O I N **********************
******************************************************************************/
BOOL CInccKscIn::CleanUpIsoIn()
{
m_pfnConv = ConvMain;
m_pfnCleanUp = CleanUpMain;
m_nESCBytes = 0 ;
(void)Output(ESC);
(void)ConvertChar(ISO2022_IN_CHAR);
return CleanUp();
}
/******************************************************************************
*********************** C O N V I S O I N K R ***********************
******************************************************************************/
BOOL CInccKscIn::ConvIsoInKr(UCHAR tc)
{
m_pfnConv = ConvMain;
m_pfnCleanUp = CleanUpMain;
m_nESCBytes = 0 ;
if (tc == ISO2022_IN_KR_CHAR_2) {
m_fKorea = TRUE;
return TRUE;
} else {
(void)Output(ESC);
(void)ConvertChar(ISO2022_IN_CHAR);
(void)ConvertChar(ISO2022_IN_KR_CHAR_1);
if (SUCCEEDED(ConvertChar(tc)))
return TRUE;
else
return FALSE;
}
}
/******************************************************************************
******************* C L E A N U P I S O I N K R *******************
******************************************************************************/
BOOL CInccKscIn::CleanUpIsoInKr()
{
m_pfnConv = ConvMain;
m_pfnCleanUp = CleanUpMain;
m_nESCBytes = 0 ;
(void)Output(ESC);
(void)ConvertChar(ISO2022_IN_CHAR);
(void)ConvertChar(ISO2022_IN_KR_CHAR_1);
return CleanUp();
}
int CInccKscIn::GetUnconvertBytes()
{
if ( m_fLeadByte )
return 1 ;
else if ( m_nESCBytes )
return m_nESCBytes < 4 ? m_nESCBytes : 3 ;
else
return 0 ;
}
DWORD CInccKscIn::GetConvertMode()
{
// 0xC431 -> 50225 ISO-2022-KR
return ( m_fKorea ? 1 : 0 ) + ( m_fShift ? 2 : 0 ) | 0xC4310000 ;
}
void CInccKscIn::SetConvertMode(DWORD mode)
{
Reset(); // initialization
if ( mode & 0x00000001 )
m_fKorea = TRUE ;
if ( mode & 0x00000002 )
m_fShift = TRUE ;
return ;
}
// ============================================================================
// Internet Character Set Conversion: Output to ISO-2022-KSC
// ============================================================================
/******************************************************************************
************************** C O N S T R U C T O R **************************
******************************************************************************/
CInccKscOut::CInccKscOut(UINT uCodePage, int nCodeSet, DWORD dwFlag, WCHAR *lpFallBack) : CINetCodeConverter(uCodePage, nCodeSet)
{
Reset(); // initialization
_dwFlag = dwFlag;
_lpFallBack = lpFallBack;
return ;
}
/******************************************************************************
******************************* R E S E T *********************************
******************************************************************************/
void CInccKscOut::Reset()
{
m_fDoubleByte = FALSE;
m_fShift = FALSE;
m_fKorea = FALSE;
m_tcLeadByte = 0 ;
return ;
}
/******************************************************************************
************************* C O N V E R T C H A R *************************
******************************************************************************/
HRESULT CInccKscOut::ConvertChar(UCHAR tc, int cchSrc)
{
BOOL fDone = TRUE;
HRESULT hr = S_OK;
//
//IE RAID #103403 weiwu 03/16/00
//
//Per Korean PM (sykim), we don't have to prepend iso-2022-kr designator to conversion result string
//Also considering that URLMON can't handle encoded ASCII iso-2022-kr string
//We now remove following code, if it triggers any compatibility issues, we should re-enable it
//
#if 0
// put designator to the top of the document
if (!m_fKorea) {
(void)Output(ESC);
(void)Output(ISO2022_IN_CHAR);
(void)Output(ISO2022_IN_KR_CHAR_1);
(void)Output(ISO2022_IN_KR_CHAR_2);
m_fKorea = TRUE;
}
#endif
if (!m_fDoubleByte) {
//
// We're not using IsDBCSLeadByteEx() due to perf. concern
// We should assert that our hard code table match IsDBCSLeadByteEx(),
// But, MLang ships with down level platforms and assert won't be valid if there is a range change
//
if (IS_KOR_LEADBYTE(tc)) {
m_fDoubleByte = TRUE;
m_tcLeadByte = tc;
} else {
if (m_fKorea && m_fShift) {
(void)Output(SI);
m_fShift = FALSE;
}
fDone = Output(tc);
}
} else {
m_fDoubleByte = FALSE;
if (tc > 0x40) { // Check if trail byte indicates Hangeul
if (m_tcLeadByte > 0xa0 && tc > 0xa0) { // Check if it's a Wansung
if (!m_fShift) {
if (!m_fKorea) {
(void)Output(ESC);
(void)Output(ISO2022_IN_CHAR);
(void)Output(ISO2022_IN_KR_CHAR_1);
(void)Output(ISO2022_IN_KR_CHAR_2);
m_fKorea = TRUE;
}
(void)Output(SO);
m_fShift = TRUE;
}
(void)Output(m_tcLeadByte & 0x7f);
fDone = Output(tc & 0x7f);
} else {
UCHAR szDefaultChar[3] = {0x3f}; // possible DBCS + null
if (_lpFallBack && (_dwFlag & MLCONVCHARF_USEDEFCHAR))
{
// only take SBCS, no DBCS character
if ( 1 != WideCharToMultiByte(CP_KOR_5601, 0,
(LPCWSTR)_lpFallBack, 1,
(LPSTR)szDefaultChar, ARRAYSIZE(szDefaultChar), NULL, NULL ))
szDefaultChar[0] = 0x3f;
}
// shift out if we're in DBCS mode
if (m_fKorea && m_fShift) {
(void)Output(SI);
m_fShift = FALSE;
}
if (_dwFlag & (MLCONVCHARF_NCR_ENTITIZE|MLCONVCHARF_NAME_ENTITIZE))
{
char szChar[2];
char szDstStr[10];
WCHAR szwChar[2];
int cCount;
szChar[0] = m_tcLeadByte;
szChar[1] = tc;
if (MultiByteToWideChar(CP_KOR_5601, 0, szChar, 2, szwChar, ARRAYSIZE(szwChar)))
{
// Output NCR entity
Output('&');
Output('#');
_ultoa((unsigned long)szwChar[0], (char*)szDstStr, 10);
cCount = lstrlenA(szDstStr);
for (int i=0; i< cCount; i++)
{
Output(szDstStr[i]);
}
fDone = Output(';');
}
else
{
fDone = Output(szDefaultChar[0]); // use default char
hr = S_FALSE;
}
}
else
{
fDone = Output(szDefaultChar[0]); // use default char
hr = S_FALSE;
}
}
} else {
if (m_fKorea && m_fShift) {
(void)Output(SI);
m_fShift = FALSE;
}
(void)Output(m_tcLeadByte);
fDone = Output(tc);
}
m_tcLeadByte = 0 ;
}
if (!fDone)
hr = E_FAIL;
return hr;
}
/******************************************************************************
***************************** C L E A N U P *****************************
******************************************************************************/
BOOL CInccKscOut::CleanUp()
{
BOOL fDone = TRUE;
if ( m_fShift)
{
fDone = Output(SI);
m_fShift = FALSE;
}
return fDone ;
}
int CInccKscOut::GetUnconvertBytes()
{
if (m_tcLeadByte)
return 1 ;
else
return 0 ;
}
DWORD CInccKscOut::GetConvertMode()
{
// for output, we don't need write back code page. 0xC431 -> 50225 ISO-2022-KR
return ( m_fKorea ? 1 : 0 ) + ( m_fShift ? 2 : 0 ) ;
}
void CInccKscOut::SetConvertMode(DWORD mode)
{
Reset(); // initialization
if ( mode & 0x00000001 )
m_fKorea = TRUE ;
if ( mode & 0x00000002 )
m_fShift = TRUE ;
return ;
}