#include "private.h" #include "convobj.h" #include "detcbase.h" #include "codepage.h" #include "detcjpn.h" #include "detckrn.h" CMLangConvertCharset::CMLangConvertCharset(void) { DllAddRef(); lpCharConverter = NULL ; m_dwSrcEncoding = 0 ; m_dwDetectSrcEncoding = 0 ; m_dwDstEncoding = 0 ; m_dwMode = 0 ; return ; } CMLangConvertCharset::~CMLangConvertCharset(void) { if (lpCharConverter) delete lpCharConverter ; DllRelease(); return ; } // // CMLangConvertCharset implementation // STDAPI CMLangConvertCharset::Initialize(UINT uiSrcCodePage, UINT uiDstCodePage, DWORD dwProperty) { HRESULT hr = S_OK ; DebugMsg(DM_TRACE, TEXT("CMLangConvertCharset::Initialize called.")); if ( m_dwSrcEncoding != uiSrcCodePage || m_dwDstEncoding != uiDstCodePage ) { m_dwSrcEncoding = uiSrcCodePage ; m_dwDstEncoding = uiDstCodePage ; if (lpCharConverter) delete lpCharConverter ; lpCharConverter = new CICharConverter ; if (!lpCharConverter) return E_FAIL ; hr = lpCharConverter->ConvertSetup(&m_dwSrcEncoding, m_dwDstEncoding); } m_dwMode = 0 ; m_dwProperty = dwProperty ; return hr ; } STDAPI CMLangConvertCharset::GetSourceCodePage(UINT *puiSrcCodePage) { DebugMsg(DM_TRACE, TEXT("CMLangConvertCharset::GetSourceCodePage called.")); if (puiSrcCodePage) { *puiSrcCodePage = m_dwSrcEncoding ; return S_OK ; } else return E_INVALIDARG ; } STDAPI CMLangConvertCharset::GetDestinationCodePage(UINT *puiDstCodePage) { DebugMsg(DM_TRACE, TEXT("CMLangConvertCharset::GetDestinationCodePage called.")); if (puiDstCodePage) { *puiDstCodePage = m_dwDstEncoding ; return S_OK ; } else return E_INVALIDARG ; } STDAPI CMLangConvertCharset::GetDeterminedSrcCodePage(UINT *puiCodePage) { DebugMsg(DM_TRACE, TEXT("CMLangConvertCharset::GetDeterminedSrcCodePage called.")); if (m_dwDetectSrcEncoding) { if (puiCodePage) { *puiCodePage = m_dwDetectSrcEncoding; return S_OK ; } else return E_INVALIDARG ; } else return S_FALSE ; } STDAPI CMLangConvertCharset::GetProperty(DWORD *pdwProperty) { DebugMsg(DM_TRACE, TEXT("CMLangConvertCharset::GetProperty called.")); if (pdwProperty) { *pdwProperty = m_dwProperty; return S_OK ; } else return E_INVALIDARG ; } STDAPI CMLangConvertCharset::DoConversion(BYTE *pSrcStr, UINT *pcSrcSize, BYTE *pDstStr, UINT *pcDstSize) { HRESULT hr ; DWORD dwMode = m_dwMode ; int nSrcSize = -1 ; int nDstSize = 0 ; DebugMsg(DM_TRACE, TEXT("CMLangConvertCharset::DoConversion called.")); // no converter was set up if (!lpCharConverter) return E_FAIL ; if (pcSrcSize) nSrcSize = *pcSrcSize ; if ( pSrcStr && nSrcSize == -1 ) // Get length of lpSrcStr if not given, assuming lpSrcStr is a zero terminate string. { if ( m_dwSrcEncoding == CP_UCS_2 ) nSrcSize = (lstrlenW( (WCHAR*) pSrcStr) << 1 ) ; else nSrcSize = lstrlenA( (CHAR*) pSrcStr) ; } if (pcDstSize) nDstSize = *pcDstSize ; if ( m_dwSrcEncoding == CP_JP_AUTO ) // Auto Detection for Japan { CIncdJapanese DetectJapan; DWORD dwSrcEncoding ; dwSrcEncoding = DetectJapan.DetectStringA((LPSTR)pSrcStr, nSrcSize); // if dwSrcEncoding is zero means there is an ambiguity, we don't return // the detected codepage to caller, instead we defaut its codepage internally // to SJIS if (dwSrcEncoding) { m_dwDetectSrcEncoding = m_dwSrcEncoding = dwSrcEncoding ; m_dwProperty |= MLCONVCHARF_AUTODETECT ; } else dwSrcEncoding = CP_JPN_SJ; hr = lpCharConverter->ConvertSetup(&dwSrcEncoding, m_dwDstEncoding); if ( hr != S_OK ) return hr ; } else if ( m_dwSrcEncoding == CP_KR_AUTO ) // Auto Detection for Korean { CIncdKorean DetectKorean; m_dwDetectSrcEncoding = m_dwSrcEncoding = DetectKorean.DetectStringA((LPSTR)pSrcStr, nSrcSize); hr = lpCharConverter->ConvertSetup(&m_dwSrcEncoding, m_dwDstEncoding); if ( hr != S_OK ) return hr ; m_dwProperty |= MLCONVCHARF_AUTODETECT ; } else if ( m_dwSrcEncoding == CP_AUTO ) // General Auto Detection for all code pages { int _nSrcSize = DETECTION_MAX_LEN < nSrcSize ? DETECTION_MAX_LEN : nSrcSize; INT nScores = 1; DWORD dwSrcEncoding ; DetectEncodingInfo Encoding; if ( S_OK == _DetectInputCodepage(MLDETECTCP_HTML, 1252, (char *)pSrcStr, &_nSrcSize, &Encoding, &nScores)) { m_dwDetectSrcEncoding = m_dwSrcEncoding = dwSrcEncoding = Encoding.nCodePage; m_dwProperty |= MLCONVCHARF_AUTODETECT ; } else { dwSrcEncoding = 1252; } hr = lpCharConverter->ConvertSetup(&dwSrcEncoding, m_dwDstEncoding); if ( hr != S_OK ) { return hr ; } } hr = lpCharConverter->DoCodeConvert(&dwMode, (LPCSTR) pSrcStr, &nSrcSize, (LPSTR) pDstStr, &nDstSize, m_dwProperty, NULL); // return the number of bytes processed for the source. if (pcSrcSize) *pcSrcSize = lpCharConverter->_nSrcSize ; if (pcDstSize) *pcDstSize = nDstSize; if (pDstStr) m_dwMode = dwMode ; lpCharConverter->ConvertCleanUp(); return hr ; } STDAPI CMLangConvertCharset::DoConversionToUnicode(CHAR *pSrcStr, UINT *pcSrcSize, WCHAR *pDstStr, UINT *pcDstSize) { HRESULT hr ; UINT nByteCountSize = (pcDstSize ? *pcDstSize * sizeof(WCHAR) : 0 ) ; DebugMsg(DM_TRACE, TEXT("CMLangConvertCharset::DoConversionToUnicode called.")); hr = DoConversion((BYTE*)pSrcStr,pcSrcSize,(BYTE*)pDstStr,&nByteCountSize); if (pcDstSize) *pcDstSize = nByteCountSize / sizeof(WCHAR); return hr; } STDAPI CMLangConvertCharset::DoConversionFromUnicode(WCHAR *pSrcStr, UINT *pcSrcSize, CHAR *pDstStr, UINT *pcDstSize) { HRESULT hr ; UINT nByteCountSize = (pcSrcSize ? *pcSrcSize * sizeof(WCHAR) : 0 ) ; DebugMsg(DM_TRACE, TEXT("CMLangConvertCharset::DoConversionFromUnicode called.")); hr = DoConversion((BYTE*)pSrcStr,&nByteCountSize,(BYTE*)pDstStr,pcDstSize); if (pcSrcSize) *pcSrcSize = nByteCountSize / sizeof(WCHAR); return hr ; }