580 lines
12 KiB
C
580 lines
12 KiB
C
|
/*++
|
||
|
|
||
|
Copyright (c) 1996 Microsoft Corporation
|
||
|
|
||
|
Module Name:
|
||
|
|
||
|
unicode.c
|
||
|
|
||
|
Abstract:
|
||
|
|
||
|
Simplified Unicode-Ansi conversion functions.
|
||
|
|
||
|
Externally exposed routines:
|
||
|
In-Place Conversion:
|
||
|
KnownSizeDbcsToUnicodeN
|
||
|
KnownSizeUnicodeToDbcsN
|
||
|
KnownSizeWtoA
|
||
|
KnownSizeAtoW
|
||
|
|
||
|
In-Place Conversion without nul checks:
|
||
|
DirectDbcsToUnicodeN
|
||
|
DirectUnicodeToDbcsN
|
||
|
DirectAtoW
|
||
|
DirectWtoA
|
||
|
|
||
|
Length/pool options:
|
||
|
DbcsToUnicodeN
|
||
|
UnicodeToDbcsN
|
||
|
DbcsToUnicode
|
||
|
UnicodeToDbcs
|
||
|
FreeConvertedPoolStr
|
||
|
|
||
|
Simplified type conversions:
|
||
|
ConvertWtoA
|
||
|
ConvertAtoW
|
||
|
FreeConvertedStr
|
||
|
|
||
|
TCHAR routines that can be compiled both ways:
|
||
|
CreateDbcs
|
||
|
CreateUnicode
|
||
|
DestroyDbcs
|
||
|
DestroyUnicode
|
||
|
|
||
|
Author:
|
||
|
|
||
|
Jim Schmidt (jimschm) 04-Aug-1997
|
||
|
|
||
|
Revision History:
|
||
|
|
||
|
marcw 2-Sep-1999 Moved over from Win9xUpg project.
|
||
|
|
||
|
jimschm 15-Feb-1999 Eliminated MikeCo's routines, since they are
|
||
|
broken on FE
|
||
|
jimschm 23-Sep-1998 Added in-place routines
|
||
|
|
||
|
--*/
|
||
|
|
||
|
#include "pch.h"
|
||
|
#include <locale.h>
|
||
|
#include "utilsp.h"
|
||
|
|
||
|
extern PMHANDLE g_TextPool;
|
||
|
extern DWORD g_MigutilWCToMBFlags;
|
||
|
|
||
|
WORD g_GlobalCodePage = CP_ACP;
|
||
|
|
||
|
typedef VOID(WINAPI SETACP)(WORD CodePage);
|
||
|
typedef SETACP * PSETACP;
|
||
|
|
||
|
VOID
|
||
|
SetGlobalCodePage (
|
||
|
IN WORD CodePage,
|
||
|
IN LCID Locale
|
||
|
)
|
||
|
{
|
||
|
PSETACP SetACP;
|
||
|
HANDLE Lib;
|
||
|
|
||
|
g_GlobalCodePage = CodePage;
|
||
|
|
||
|
|
||
|
if (ISNT()) {
|
||
|
Lib = LoadLibrary (TEXT("kernel32.dll"));
|
||
|
if (Lib) {
|
||
|
SetACP = (PSETACP) GetProcAddress (Lib, "SetCPGlobal");
|
||
|
if (SetACP) {
|
||
|
SetACP (CodePage);
|
||
|
}
|
||
|
|
||
|
FreeLibrary (Lib);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
SetThreadLocale (Locale);
|
||
|
setlocale(LC_ALL,"");
|
||
|
InitLeadByteTable ();
|
||
|
}
|
||
|
|
||
|
WORD
|
||
|
SetConversionCodePage (
|
||
|
IN WORD CodePage
|
||
|
)
|
||
|
{
|
||
|
WORD oldCodePage = g_GlobalCodePage;
|
||
|
g_GlobalCodePage = CodePage;
|
||
|
return oldCodePage;
|
||
|
}
|
||
|
|
||
|
VOID
|
||
|
GetGlobalCodePage (
|
||
|
OUT PWORD CodePage, OPTIONAL
|
||
|
OUT PLCID Locale OPTIONAL
|
||
|
)
|
||
|
{
|
||
|
if (CodePage) {
|
||
|
if (g_GlobalCodePage == CP_ACP) {
|
||
|
*CodePage = (WORD) GetACP();
|
||
|
} else {
|
||
|
*CodePage = g_GlobalCodePage;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (Locale) {
|
||
|
*Locale = GetThreadLocale();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
PCSTR
|
||
|
RealUnicodeToDbcsN (
|
||
|
IN PMHANDLE Pool, OPTIONAL
|
||
|
IN PCWSTR StrIn,
|
||
|
IN DWORD Chars
|
||
|
)
|
||
|
|
||
|
/*++
|
||
|
|
||
|
Routine Description:
|
||
|
|
||
|
Converts a UNICODE string to DBCS.
|
||
|
|
||
|
Arguments:
|
||
|
|
||
|
Pool - Specifies the pool where memory is allocated from. If not specified,
|
||
|
g_TextPool is used instead.
|
||
|
|
||
|
StrIn - Specifies the inbound UNICODE string
|
||
|
|
||
|
Chars - Specifies the number of characters, excluding the nul, to
|
||
|
convert.
|
||
|
|
||
|
Return Value:
|
||
|
|
||
|
A pointer to the ANSI string, or NULL if an error occurred.
|
||
|
|
||
|
--*/
|
||
|
|
||
|
{
|
||
|
PSTR DbcsStr;
|
||
|
DWORD Size;
|
||
|
DWORD rc;
|
||
|
|
||
|
if (!Pool) {
|
||
|
Pool = g_TextPool;
|
||
|
}
|
||
|
|
||
|
if (INVALID_CHAR_COUNT == Chars) {
|
||
|
Chars = CharCountW (StrIn);
|
||
|
}
|
||
|
|
||
|
Size = (Chars + 1) * 3; // maximum for UTF8 encoding
|
||
|
|
||
|
DbcsStr = (PSTR) PmGetAlignedMemory (Pool, Size);
|
||
|
if (!DbcsStr) {
|
||
|
DEBUGMSG ((DBG_ERROR, "UnicodeToDbcsN could not allocate string"));
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
rc = (DWORD) WideCharToMultiByte (
|
||
|
g_GlobalCodePage,
|
||
|
(g_GlobalCodePage == CP_UTF8)?0:g_MigutilWCToMBFlags,
|
||
|
StrIn,
|
||
|
(INT) Chars, // wc input count
|
||
|
DbcsStr,
|
||
|
(INT) Size,
|
||
|
NULL,
|
||
|
NULL
|
||
|
);
|
||
|
|
||
|
// Report error returns from WideCharToMultiByte
|
||
|
if (!rc && Chars) {
|
||
|
PushError();
|
||
|
PmReleaseMemory (Pool, DbcsStr);
|
||
|
PopError();
|
||
|
|
||
|
DEBUGMSG ((
|
||
|
DBG_WARNING,
|
||
|
"UnicodeToDbcsN error caused memory to be released in pool; may cause harmless PoolMem warnings."
|
||
|
));
|
||
|
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
if (g_GlobalCodePage == CP_UTF8) {
|
||
|
DbcsStr [rc] = 0;
|
||
|
} else {
|
||
|
*CharCountToPointerA (DbcsStr, Chars) = 0;
|
||
|
}
|
||
|
|
||
|
return DbcsStr;
|
||
|
}
|
||
|
|
||
|
|
||
|
PCWSTR
|
||
|
RealDbcsToUnicodeN (
|
||
|
IN PMHANDLE Pool, OPTIONAL
|
||
|
IN PCSTR StrIn,
|
||
|
IN DWORD Chars
|
||
|
)
|
||
|
|
||
|
/*++
|
||
|
|
||
|
Routine Description:
|
||
|
|
||
|
Converts a DBCS string to UNICODE.
|
||
|
|
||
|
Arguments:
|
||
|
|
||
|
Pool - Specifies pool to allocate UNICODE string from. If not specified,
|
||
|
g_TextPool is used.
|
||
|
|
||
|
StrIn - Specifies string to be converted
|
||
|
|
||
|
Chars - Specifies the number of multibyte characters, excluding the nul,
|
||
|
to convert. If -1, all of StrIn will be converted.
|
||
|
|
||
|
Return Value:
|
||
|
|
||
|
A pointer to the converted UNICODE string, or NULL if an error ocurred.
|
||
|
|
||
|
--*/
|
||
|
|
||
|
{
|
||
|
PWSTR UnicodeStr;
|
||
|
DWORD UnicodeStrBufLenBytes;
|
||
|
DWORD WcharsConverted;
|
||
|
DWORD StrInBytesToConvert;
|
||
|
|
||
|
//
|
||
|
// Find number of multi-byte characters to convert. Punt on case where
|
||
|
// caller asks for more chars than available.
|
||
|
//
|
||
|
if (INVALID_CHAR_COUNT == Chars) {
|
||
|
Chars = CharCountA (StrIn);
|
||
|
}
|
||
|
|
||
|
//
|
||
|
// Count bytes to convert from the input string (excludes delimiter)
|
||
|
//
|
||
|
StrInBytesToConvert = (DWORD)(CharCountToPointerA(StrIn, Chars) - StrIn);
|
||
|
|
||
|
//
|
||
|
// Get output buffer size, in bytes, including delimiter
|
||
|
//
|
||
|
UnicodeStrBufLenBytes = (Chars + 1) * sizeof (WCHAR);
|
||
|
|
||
|
if (!Pool) {
|
||
|
Pool = g_TextPool;
|
||
|
}
|
||
|
|
||
|
//
|
||
|
// Get buffer
|
||
|
//
|
||
|
UnicodeStr = (PWSTR) PmGetAlignedMemory (Pool, UnicodeStrBufLenBytes);
|
||
|
if (!UnicodeStr) {
|
||
|
DEBUGMSG ((DBG_ERROR, "DbcsToUnicodeN could not allocate string"));
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
//
|
||
|
// Convert
|
||
|
//
|
||
|
WcharsConverted = (DWORD) MultiByteToWideChar (
|
||
|
g_GlobalCodePage,
|
||
|
0,
|
||
|
StrIn,
|
||
|
(INT) StrInBytesToConvert,
|
||
|
UnicodeStr,
|
||
|
(INT) UnicodeStrBufLenBytes
|
||
|
);
|
||
|
|
||
|
//
|
||
|
// Check for conversion error (>0 chars in, 0 chars out)
|
||
|
//
|
||
|
if (0 == WcharsConverted && 0 != Chars) {
|
||
|
PushError();
|
||
|
PmReleaseMemory (Pool, UnicodeStr);
|
||
|
PopError();
|
||
|
|
||
|
DEBUGMSG ((
|
||
|
DBG_WARNING,
|
||
|
"DbcsToUnicodeN error caused memory to be released in pool; may cause harmless warnings."
|
||
|
));
|
||
|
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
//
|
||
|
// Write delimiter on the output string
|
||
|
//
|
||
|
UnicodeStr[WcharsConverted] = 0;
|
||
|
|
||
|
return UnicodeStr;
|
||
|
}
|
||
|
|
||
|
|
||
|
VOID
|
||
|
FreeConvertedPoolStr (
|
||
|
IN PMHANDLE Pool, OPTIONAL
|
||
|
IN PVOID StrIn
|
||
|
)
|
||
|
|
||
|
/*++
|
||
|
|
||
|
Routine Description:
|
||
|
|
||
|
Frees the memory allocated by UnicodeToDbcsN or DbcsToUnicodeN.
|
||
|
|
||
|
Arguments:
|
||
|
|
||
|
Pool - Specifies pool to allocate UNICODE string from. If not specified,
|
||
|
g_TextPool is used.
|
||
|
|
||
|
StrIn - Specifies string that was returned by UnicodeToDebcsN or
|
||
|
DbcsToUnicodeN.
|
||
|
|
||
|
Return Value:
|
||
|
|
||
|
none
|
||
|
|
||
|
--*/
|
||
|
|
||
|
{
|
||
|
if (!StrIn) {
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
if (!Pool) {
|
||
|
Pool = g_TextPool;
|
||
|
}
|
||
|
|
||
|
PmReleaseMemory (Pool, (PVOID) StrIn);
|
||
|
}
|
||
|
|
||
|
|
||
|
PSTR
|
||
|
KnownSizeUnicodeToDbcsN (
|
||
|
OUT PSTR StrOut,
|
||
|
IN PCWSTR StrIn,
|
||
|
IN DWORD Chars
|
||
|
)
|
||
|
|
||
|
/*++
|
||
|
|
||
|
Routine Description:
|
||
|
|
||
|
KnownSizeUnicodeToDbcsN converts a UNICODE string to DBCS. The caller
|
||
|
manages the outbound buffer.
|
||
|
|
||
|
Arguments:
|
||
|
|
||
|
StrOut - Receives the DBCS result.
|
||
|
StrIn - Specifies the UNICODE string to convert.
|
||
|
Chars - Specifies the character count of StrIn (not the byte count), or
|
||
|
INVALID_CHAR_COUNT for the complete string.
|
||
|
|
||
|
Return Value:
|
||
|
|
||
|
Returns StrOut.
|
||
|
|
||
|
--*/
|
||
|
|
||
|
{
|
||
|
DWORD rc;
|
||
|
|
||
|
if (INVALID_CHAR_COUNT == Chars) {
|
||
|
Chars = CharCountW (StrIn);
|
||
|
}
|
||
|
|
||
|
rc = (DWORD) WideCharToMultiByte (
|
||
|
g_GlobalCodePage,
|
||
|
g_MigutilWCToMBFlags,
|
||
|
StrIn,
|
||
|
(INT) Chars, // wc input count
|
||
|
StrOut,
|
||
|
(INT) Chars * 2,
|
||
|
NULL,
|
||
|
NULL
|
||
|
);
|
||
|
|
||
|
DEBUGMSG_IF ((
|
||
|
!rc && Chars,
|
||
|
DBG_WARNING,
|
||
|
"KnownSizeUnicodeToDbcsN failed."
|
||
|
));
|
||
|
|
||
|
StrOut[rc] = 0;
|
||
|
|
||
|
return StrOut;
|
||
|
}
|
||
|
|
||
|
|
||
|
PWSTR
|
||
|
KnownSizeDbcsToUnicodeN (
|
||
|
OUT PWSTR StrOut,
|
||
|
IN PCSTR StrIn,
|
||
|
IN DWORD Chars
|
||
|
)
|
||
|
|
||
|
/*++
|
||
|
|
||
|
Routine Description:
|
||
|
|
||
|
KnownSizeDbcsToUnicodeN converts a DBCS string to UNICODE. The caller
|
||
|
manages the outbound buffer.
|
||
|
|
||
|
Arguments:
|
||
|
|
||
|
StrOut - Receives the UNICODE result.
|
||
|
StrIn - Specifies the DBCS string to convert.
|
||
|
Chars - Specifies the character count of StrIn (not the byte count), or
|
||
|
INVALID_CHAR_COUNT for the complete string.
|
||
|
|
||
|
Return Value:
|
||
|
|
||
|
Returns StrOut.
|
||
|
|
||
|
--*/
|
||
|
|
||
|
{
|
||
|
DWORD rc;
|
||
|
DWORD StrInBytesToConvert;
|
||
|
|
||
|
if (INVALID_CHAR_COUNT == Chars) {
|
||
|
StrInBytesToConvert = ByteCountA (StrIn);
|
||
|
} else {
|
||
|
StrInBytesToConvert = (DWORD)(CharCountToPointerA (StrIn, Chars) - StrIn);
|
||
|
}
|
||
|
|
||
|
rc = (DWORD) MultiByteToWideChar (
|
||
|
g_GlobalCodePage,
|
||
|
0, // MB_ERR_INVALID_CHARS,
|
||
|
StrIn,
|
||
|
(INT) StrInBytesToConvert,
|
||
|
StrOut,
|
||
|
(INT) StrInBytesToConvert * 2
|
||
|
);
|
||
|
|
||
|
DEBUGMSG_IF ((
|
||
|
!rc && Chars,
|
||
|
DBG_WARNING,
|
||
|
"KnownSizeDbcsToUnicodeN failed."
|
||
|
));
|
||
|
|
||
|
StrOut[rc] = 0;
|
||
|
|
||
|
return StrOut;
|
||
|
}
|
||
|
|
||
|
|
||
|
PSTR
|
||
|
DirectUnicodeToDbcsN (
|
||
|
OUT PSTR StrOut,
|
||
|
IN PCWSTR StrIn,
|
||
|
IN DWORD Bytes
|
||
|
)
|
||
|
|
||
|
/*++
|
||
|
|
||
|
Routine Description:
|
||
|
|
||
|
DirectUnicodeToDbcsN converts a UNICODE string to DBCS. The caller
|
||
|
manages the outbound buffer. This function does not check for nuls
|
||
|
in StrIn when Bytes is non-zero, and it does not terminate the
|
||
|
string.
|
||
|
|
||
|
Arguments:
|
||
|
|
||
|
StrOut - Receives the DBCS result.
|
||
|
StrIn - Specifies the UNICODE string to convert.
|
||
|
Bytes - Specifies the byte count of StrIn, or INVALID_CHAR_COUNT
|
||
|
for the complete string.
|
||
|
|
||
|
Return Value:
|
||
|
|
||
|
Returns StrOut.
|
||
|
|
||
|
--*/
|
||
|
|
||
|
{
|
||
|
DWORD rc;
|
||
|
|
||
|
if (INVALID_CHAR_COUNT == Bytes) {
|
||
|
Bytes = ByteCountW (StrIn);
|
||
|
}
|
||
|
|
||
|
rc = (DWORD) WideCharToMultiByte (
|
||
|
g_GlobalCodePage,
|
||
|
g_MigutilWCToMBFlags,
|
||
|
StrIn,
|
||
|
(INT) (Bytes / sizeof (WCHAR)),
|
||
|
StrOut,
|
||
|
(INT) Bytes,
|
||
|
NULL,
|
||
|
NULL
|
||
|
);
|
||
|
|
||
|
DEBUGMSG_IF ((
|
||
|
!rc && Bytes,
|
||
|
DBG_WARNING,
|
||
|
"DirectUnicodeToDbcsN failed."
|
||
|
));
|
||
|
|
||
|
return StrOut + rc;
|
||
|
}
|
||
|
|
||
|
|
||
|
PWSTR
|
||
|
DirectDbcsToUnicodeN (
|
||
|
OUT PWSTR StrOut,
|
||
|
IN PCSTR StrIn,
|
||
|
IN DWORD Bytes
|
||
|
)
|
||
|
|
||
|
/*++
|
||
|
|
||
|
Routine Description:
|
||
|
|
||
|
DirectDbcsToUnicodeN converts a DBCS string to UNICODE. The caller
|
||
|
manages the outbound buffer. This function does not check for nuls
|
||
|
in StrIn when Bytes is non-zero, and it does not terminate the string.
|
||
|
|
||
|
Arguments:
|
||
|
|
||
|
StrOut - Receives the UNICODE result.
|
||
|
StrIn - Specifies the DBCS string to convert.
|
||
|
Bytes - Specifies the byte count of StrIn, or INVALID_CHAR_COUNT
|
||
|
for the complete string.
|
||
|
|
||
|
Return Value:
|
||
|
|
||
|
Returns StrOut.
|
||
|
|
||
|
--*/
|
||
|
|
||
|
{
|
||
|
DWORD rc;
|
||
|
|
||
|
if (INVALID_CHAR_COUNT == Bytes) {
|
||
|
Bytes = ByteCountA (StrIn);
|
||
|
}
|
||
|
|
||
|
rc = (DWORD) MultiByteToWideChar (
|
||
|
g_GlobalCodePage,
|
||
|
0, // MB_ERR_INVALID_CHARS,
|
||
|
StrIn,
|
||
|
(INT) Bytes,
|
||
|
StrOut,
|
||
|
(INT) Bytes * 2
|
||
|
);
|
||
|
|
||
|
DEBUGMSG_IF ((
|
||
|
!rc && Bytes,
|
||
|
DBG_WARNING,
|
||
|
"DirectDbcsToUnicodeN failed."
|
||
|
));
|
||
|
|
||
|
return StrOut + rc;
|
||
|
}
|