283 lines
9.6 KiB
C++
283 lines
9.6 KiB
C++
#include "private.h"
|
|
#include "jisobj.h"
|
|
#include "eucjobj.h"
|
|
#include "hzgbobj.h"
|
|
#include "kscobj.h"
|
|
|
|
#include "utf8obj.h"
|
|
#include "utf7obj.h"
|
|
|
|
#include "fechrcnv.h"
|
|
|
|
#include "codepage.h"
|
|
|
|
#include "ichrcnv.h"
|
|
|
|
|
|
|
|
HRESULT CICharConverter::KSC5601ToEUCKR(LPCSTR lpSrcStr, LPINT lpnSrcSize, LPSTR lpDestStr, int cchDest, LPINT lpnSize)
|
|
{
|
|
int nSize=0;
|
|
int i=0;
|
|
HRESULT hr = S_OK;
|
|
UCHAR szDefaultChar[3] = {0x3f}; // possible DBCS + null
|
|
|
|
|
|
if (_lpFallBack && (_dwFlag & MLCONVCHARF_USEDEFCHAR))
|
|
{
|
|
// only take SBCS, no DBCS character
|
|
if ( 1 != WideCharToMultiByte(CP_KOR_5601, 0,
|
|
(LPCWSTR)_lpFallBack, 1,
|
|
(LPSTR)szDefaultChar, ARRAYSIZE(szDefaultChar), NULL, NULL ))
|
|
szDefaultChar[0] = 0x3f;
|
|
}
|
|
|
|
|
|
while(i < *lpnSrcSize)
|
|
{
|
|
// Check space
|
|
if (lpDestStr && (nSize > cchDest))
|
|
break;
|
|
|
|
// DBCS
|
|
if (((UCHAR)lpSrcStr[i] >= 0x81 && (UCHAR)lpSrcStr[i] <= 0xFE) && (i+1 < *lpnSrcSize))
|
|
{
|
|
|
|
// UHC
|
|
if (!((UCHAR)lpSrcStr[i] >= 0xA1 && (UCHAR)lpSrcStr[i] <= 0xFE &&
|
|
(UCHAR)lpSrcStr[i+1] >= 0xA1 && (UCHAR)lpSrcStr[i+1] <= 0xFE))
|
|
|
|
{
|
|
// use NCR if flag specified
|
|
if (_dwFlag & (MLCONVCHARF_NCR_ENTITIZE|MLCONVCHARF_NAME_ENTITIZE))
|
|
{
|
|
char szDstStr[10] = {0};
|
|
WCHAR szwChar[2];
|
|
int cCount;
|
|
|
|
if (MultiByteToWideChar(CP_KOR_5601, 0, &lpSrcStr[i], 2, szwChar, ARRAYSIZE(szwChar)))
|
|
{
|
|
// Caculate NCR length
|
|
_ultoa((unsigned long)szwChar[0], (char*)szDstStr, 10);
|
|
cCount = lstrlenA(szDstStr)+3;
|
|
// Not enough space for NCR entity
|
|
if (lpDestStr)
|
|
{
|
|
if (nSize+cCount > cchDest)
|
|
break;
|
|
// Output NCR entity
|
|
else
|
|
{
|
|
*lpDestStr ++= '&';
|
|
*lpDestStr ++= '#';
|
|
for (int j=0; j< cCount-3; j++)
|
|
*lpDestStr++=szDstStr[j];
|
|
*lpDestStr ++= ';';
|
|
}
|
|
}
|
|
nSize += cCount;
|
|
}
|
|
else
|
|
{
|
|
if (lpDestStr)
|
|
{
|
|
if (nSize+1 > cchDest)
|
|
break;
|
|
*lpDestStr++=szDefaultChar[0];
|
|
}
|
|
nSize++;
|
|
hr = S_FALSE;
|
|
}
|
|
}
|
|
// use default char, question mark
|
|
else
|
|
{
|
|
if (lpDestStr)
|
|
{
|
|
if (nSize+1 > cchDest)
|
|
break;
|
|
*lpDestStr++=szDefaultChar[0];
|
|
}
|
|
nSize++;
|
|
hr = S_FALSE;
|
|
}
|
|
i += 2;
|
|
}
|
|
// Wansung
|
|
else
|
|
{
|
|
if (lpDestStr)
|
|
{
|
|
if (nSize+2 > cchDest)
|
|
break;
|
|
*lpDestStr++=lpSrcStr[i];
|
|
*lpDestStr++=lpSrcStr[i+1];
|
|
}
|
|
i+=2;
|
|
nSize += 2;
|
|
}
|
|
}
|
|
// SBCS
|
|
else
|
|
{
|
|
if (lpDestStr)
|
|
{
|
|
if (nSize+1 > cchDest)
|
|
break;
|
|
*lpDestStr++=lpSrcStr[i];
|
|
}
|
|
nSize++;
|
|
i++;
|
|
}
|
|
} // End of loop
|
|
|
|
if (lpnSize)
|
|
*lpnSize = nSize;
|
|
|
|
return hr;
|
|
}
|
|
|
|
|
|
/******************************************************************************
|
|
****************** C O N V E R T I N E T S T R I N G ******************
|
|
******************************************************************************/
|
|
HRESULT CICharConverter::CreateINetString(BOOL fInbound, UINT uCodePage, int nCodeSet)
|
|
{
|
|
if (_hcins)
|
|
{
|
|
delete _hcins ;
|
|
_hcins = NULL ;
|
|
}
|
|
|
|
if (fInbound) { // Inbound
|
|
if (uCodePage == CP_JPN_SJ && ( nCodeSet == CP_ISO_2022_JP ||
|
|
nCodeSet == CP_ISO_2022_JP_ESC || nCodeSet == CP_ISO_2022_JP_SIO ))
|
|
// JIS
|
|
_hcins = new CInccJisIn(uCodePage, nCodeSet);
|
|
else if (uCodePage == CP_JPN_SJ && nCodeSet == CP_EUC_JP ) // EUC
|
|
_hcins = new CInccEucJIn(uCodePage, nCodeSet);
|
|
else if (uCodePage == CP_CHN_GB && nCodeSet == CP_CHN_HZ ) // HZ-GB
|
|
_hcins = new CInccHzGbIn(uCodePage, nCodeSet);
|
|
else if (uCodePage == CP_KOR_5601 && nCodeSet == CP_ISO_2022_KR )
|
|
_hcins = new CInccKscIn(uCodePage, nCodeSet);
|
|
else if (uCodePage == CP_UCS_2 && nCodeSet == CP_UTF_8 )
|
|
_hcins = new CInccUTF8In(uCodePage, nCodeSet);
|
|
else if (uCodePage == CP_UCS_2 && nCodeSet == CP_UTF_7 )
|
|
_hcins = new CInccUTF7In(uCodePage, nCodeSet);
|
|
|
|
} else { // Outbound
|
|
if (uCodePage == CP_JPN_SJ && ( nCodeSet == CP_ISO_2022_JP ||
|
|
nCodeSet == CP_ISO_2022_JP_ESC || nCodeSet == CP_ISO_2022_JP_SIO ))
|
|
// JIS
|
|
_hcins = new CInccJisOut(uCodePage, nCodeSet, _dwFlag, _lpFallBack);
|
|
else if (uCodePage == CP_JPN_SJ && nCodeSet == CP_EUC_JP ) // EUC
|
|
_hcins = new CInccEucJOut(uCodePage, nCodeSet, _dwFlag, _lpFallBack);
|
|
else if (uCodePage == CP_CHN_GB && nCodeSet == CP_CHN_HZ ) // HZ-GB
|
|
_hcins = new CInccHzGbOut(uCodePage, nCodeSet, _dwFlag, _lpFallBack);
|
|
else if (uCodePage == CP_KOR_5601 && nCodeSet == CP_ISO_2022_KR )
|
|
_hcins = new CInccKscOut(uCodePage, nCodeSet, _dwFlag, _lpFallBack);
|
|
else if (uCodePage == CP_UCS_2 && nCodeSet == CP_UTF_8 )
|
|
_hcins = new CInccUTF8Out(uCodePage, nCodeSet);
|
|
else if (uCodePage == CP_UCS_2 && nCodeSet == CP_UTF_7 )
|
|
_hcins = new CInccUTF7Out(uCodePage, nCodeSet);
|
|
|
|
}
|
|
|
|
// recode the dst codepage
|
|
if ( _hcins )
|
|
_hcins_dst = nCodeSet ;
|
|
|
|
return S_OK ;
|
|
}
|
|
|
|
HRESULT CICharConverter::DoConvertINetString(LPDWORD lpdwMode, BOOL fInbound, UINT uCodePage, int nCodeSet,
|
|
LPCSTR lpSrcStr, LPINT lpnSrcSize, LPSTR lpDestStr, int cchDest, LPINT lpnSize)
|
|
{
|
|
HRESULT hr = S_OK;
|
|
HCINS hcins = NULL;
|
|
int nSize = 0 ;
|
|
int cchSrc = *lpnSrcSize ;
|
|
|
|
if (!lpnSize)
|
|
lpnSize = &nSize;
|
|
|
|
if (!uCodePage) // Get default code page if nothing speicified
|
|
uCodePage = g_uACP;
|
|
|
|
if (!lpSrcStr && cchSrc < 0) // Get length of lpSrcStr if not given, assuming lpSrcStr is a zero terminate string.
|
|
cchSrc = lstrlenA(lpSrcStr) + 1;
|
|
|
|
if (!_hcins || ( nCodeSet != _hcins_dst ) )
|
|
CreateINetString(fInbound,uCodePage,nCodeSet);
|
|
|
|
if (_hcins ) { // Context created, it means DBCS
|
|
int nTempSize = 0 ;
|
|
|
|
// restore previous mode SO/SI ESC etc.
|
|
((CINetCodeConverter*)_hcins)->SetConvertMode(*lpdwMode);
|
|
|
|
// if it is a JIS output set Kana mode
|
|
if (!fInbound && uCodePage == CP_JPN_SJ && ( nCodeSet == CP_ISO_2022_JP ||
|
|
nCodeSet == CP_ISO_2022_JP_ESC || nCodeSet == CP_ISO_2022_JP_SIO ))
|
|
// JIS
|
|
((CInccJisOut*)_hcins)->SetKanaMode(nCodeSet);
|
|
|
|
if (!lpDestStr || !cchDest) // Get the converted size
|
|
{
|
|
hr = ((CINetCodeConverter*)_hcins)->GetStringSizeA(lpSrcStr, cchSrc, lpnSize);
|
|
if (0 == fInbound)
|
|
{
|
|
HRESULT _hr = ((CINetCodeConverter*)_hcins)->GetStringSizeA(NULL, 0, &nTempSize);
|
|
if (S_OK != _hr)
|
|
hr = _hr;
|
|
}
|
|
}
|
|
else // Perform actual converting
|
|
{
|
|
hr = ((CINetCodeConverter*)_hcins)->ConvertStringA(lpSrcStr, cchSrc, lpDestStr, cchDest, lpnSize);
|
|
if (0 == fInbound)
|
|
{
|
|
HRESULT _hr = ((CINetCodeConverter*)_hcins)->ConvertStringA(NULL, 0, lpDestStr+*lpnSize, cchDest-*lpnSize, &nTempSize);
|
|
if (S_OK != _hr)
|
|
hr = _hr;
|
|
}
|
|
}
|
|
|
|
*lpnSize += nTempSize;
|
|
|
|
// get number of unconvetable bytes
|
|
if ( lpnSrcSize && ((CINetCodeConverter*)_hcins)->GetUnconvertBytes() )
|
|
*lpnSrcSize = cchSrc -((CINetCodeConverter*)_hcins)->GetUnconvertBytes();
|
|
|
|
// only save current mode SO/SI ESC if we are perform actual converting
|
|
// we need this if statement because for two stages plus conversion.
|
|
// It will inquire the size first then convert from IWUU or UUWI.
|
|
|
|
if (lpDestStr && lpdwMode )
|
|
*lpdwMode = ((CINetCodeConverter*)_hcins)->GetConvertMode();
|
|
|
|
// delete hcins;
|
|
} else {
|
|
// Internet encodings that have same encoding scheme as their family encodings
|
|
switch (nCodeSet)
|
|
{
|
|
case CP_EUC_KR:
|
|
hr = KSC5601ToEUCKR(lpSrcStr, lpnSrcSize, lpDestStr, cchDest, lpnSize);
|
|
break;
|
|
|
|
default:
|
|
if (!lpDestStr || !cchDest) // Get the converted size
|
|
*lpnSize = cchSrc ;
|
|
else
|
|
{
|
|
*lpnSize = min(cchSrc, cchDest);
|
|
if (*lpnSize)
|
|
MoveMemory(lpDestStr, lpSrcStr, *lpnSize);
|
|
}
|
|
}
|
|
}
|
|
|
|
return hr;
|
|
}
|
|
|