windows-nt/Source/XPSP1/NT/windows/winstate/cobra/utils/main/unicode.c
2020-09-26 16:20:57 +08:00

580 lines
12 KiB
C

/*++
Copyright (c) 1996 Microsoft Corporation
Module Name:
unicode.c
Abstract:
Simplified Unicode-Ansi conversion functions.
Externally exposed routines:
In-Place Conversion:
KnownSizeDbcsToUnicodeN
KnownSizeUnicodeToDbcsN
KnownSizeWtoA
KnownSizeAtoW
In-Place Conversion without nul checks:
DirectDbcsToUnicodeN
DirectUnicodeToDbcsN
DirectAtoW
DirectWtoA
Length/pool options:
DbcsToUnicodeN
UnicodeToDbcsN
DbcsToUnicode
UnicodeToDbcs
FreeConvertedPoolStr
Simplified type conversions:
ConvertWtoA
ConvertAtoW
FreeConvertedStr
TCHAR routines that can be compiled both ways:
CreateDbcs
CreateUnicode
DestroyDbcs
DestroyUnicode
Author:
Jim Schmidt (jimschm) 04-Aug-1997
Revision History:
marcw 2-Sep-1999 Moved over from Win9xUpg project.
jimschm 15-Feb-1999 Eliminated MikeCo's routines, since they are
broken on FE
jimschm 23-Sep-1998 Added in-place routines
--*/
#include "pch.h"
#include <locale.h>
#include "utilsp.h"
extern PMHANDLE g_TextPool;
extern DWORD g_MigutilWCToMBFlags;
WORD g_GlobalCodePage = CP_ACP;
typedef VOID(WINAPI SETACP)(WORD CodePage);
typedef SETACP * PSETACP;
VOID
SetGlobalCodePage (
IN WORD CodePage,
IN LCID Locale
)
{
PSETACP SetACP;
HANDLE Lib;
g_GlobalCodePage = CodePage;
if (ISNT()) {
Lib = LoadLibrary (TEXT("kernel32.dll"));
if (Lib) {
SetACP = (PSETACP) GetProcAddress (Lib, "SetCPGlobal");
if (SetACP) {
SetACP (CodePage);
}
FreeLibrary (Lib);
}
}
SetThreadLocale (Locale);
setlocale(LC_ALL,"");
InitLeadByteTable ();
}
WORD
SetConversionCodePage (
IN WORD CodePage
)
{
WORD oldCodePage = g_GlobalCodePage;
g_GlobalCodePage = CodePage;
return oldCodePage;
}
VOID
GetGlobalCodePage (
OUT PWORD CodePage, OPTIONAL
OUT PLCID Locale OPTIONAL
)
{
if (CodePage) {
if (g_GlobalCodePage == CP_ACP) {
*CodePage = (WORD) GetACP();
} else {
*CodePage = g_GlobalCodePage;
}
}
if (Locale) {
*Locale = GetThreadLocale();
}
}
PCSTR
RealUnicodeToDbcsN (
IN PMHANDLE Pool, OPTIONAL
IN PCWSTR StrIn,
IN DWORD Chars
)
/*++
Routine Description:
Converts a UNICODE string to DBCS.
Arguments:
Pool - Specifies the pool where memory is allocated from. If not specified,
g_TextPool is used instead.
StrIn - Specifies the inbound UNICODE string
Chars - Specifies the number of characters, excluding the nul, to
convert.
Return Value:
A pointer to the ANSI string, or NULL if an error occurred.
--*/
{
PSTR DbcsStr;
DWORD Size;
DWORD rc;
if (!Pool) {
Pool = g_TextPool;
}
if (INVALID_CHAR_COUNT == Chars) {
Chars = CharCountW (StrIn);
}
Size = (Chars + 1) * 3; // maximum for UTF8 encoding
DbcsStr = (PSTR) PmGetAlignedMemory (Pool, Size);
if (!DbcsStr) {
DEBUGMSG ((DBG_ERROR, "UnicodeToDbcsN could not allocate string"));
return NULL;
}
rc = (DWORD) WideCharToMultiByte (
g_GlobalCodePage,
(g_GlobalCodePage == CP_UTF8)?0:g_MigutilWCToMBFlags,
StrIn,
(INT) Chars, // wc input count
DbcsStr,
(INT) Size,
NULL,
NULL
);
// Report error returns from WideCharToMultiByte
if (!rc && Chars) {
PushError();
PmReleaseMemory (Pool, DbcsStr);
PopError();
DEBUGMSG ((
DBG_WARNING,
"UnicodeToDbcsN error caused memory to be released in pool; may cause harmless PoolMem warnings."
));
return NULL;
}
if (g_GlobalCodePage == CP_UTF8) {
DbcsStr [rc] = 0;
} else {
*CharCountToPointerA (DbcsStr, Chars) = 0;
}
return DbcsStr;
}
PCWSTR
RealDbcsToUnicodeN (
IN PMHANDLE Pool, OPTIONAL
IN PCSTR StrIn,
IN DWORD Chars
)
/*++
Routine Description:
Converts a DBCS string to UNICODE.
Arguments:
Pool - Specifies pool to allocate UNICODE string from. If not specified,
g_TextPool is used.
StrIn - Specifies string to be converted
Chars - Specifies the number of multibyte characters, excluding the nul,
to convert. If -1, all of StrIn will be converted.
Return Value:
A pointer to the converted UNICODE string, or NULL if an error ocurred.
--*/
{
PWSTR UnicodeStr;
DWORD UnicodeStrBufLenBytes;
DWORD WcharsConverted;
DWORD StrInBytesToConvert;
//
// Find number of multi-byte characters to convert. Punt on case where
// caller asks for more chars than available.
//
if (INVALID_CHAR_COUNT == Chars) {
Chars = CharCountA (StrIn);
}
//
// Count bytes to convert from the input string (excludes delimiter)
//
StrInBytesToConvert = (DWORD)(CharCountToPointerA(StrIn, Chars) - StrIn);
//
// Get output buffer size, in bytes, including delimiter
//
UnicodeStrBufLenBytes = (Chars + 1) * sizeof (WCHAR);
if (!Pool) {
Pool = g_TextPool;
}
//
// Get buffer
//
UnicodeStr = (PWSTR) PmGetAlignedMemory (Pool, UnicodeStrBufLenBytes);
if (!UnicodeStr) {
DEBUGMSG ((DBG_ERROR, "DbcsToUnicodeN could not allocate string"));
return NULL;
}
//
// Convert
//
WcharsConverted = (DWORD) MultiByteToWideChar (
g_GlobalCodePage,
0,
StrIn,
(INT) StrInBytesToConvert,
UnicodeStr,
(INT) UnicodeStrBufLenBytes
);
//
// Check for conversion error (>0 chars in, 0 chars out)
//
if (0 == WcharsConverted && 0 != Chars) {
PushError();
PmReleaseMemory (Pool, UnicodeStr);
PopError();
DEBUGMSG ((
DBG_WARNING,
"DbcsToUnicodeN error caused memory to be released in pool; may cause harmless warnings."
));
return NULL;
}
//
// Write delimiter on the output string
//
UnicodeStr[WcharsConverted] = 0;
return UnicodeStr;
}
VOID
FreeConvertedPoolStr (
IN PMHANDLE Pool, OPTIONAL
IN PVOID StrIn
)
/*++
Routine Description:
Frees the memory allocated by UnicodeToDbcsN or DbcsToUnicodeN.
Arguments:
Pool - Specifies pool to allocate UNICODE string from. If not specified,
g_TextPool is used.
StrIn - Specifies string that was returned by UnicodeToDebcsN or
DbcsToUnicodeN.
Return Value:
none
--*/
{
if (!StrIn) {
return;
}
if (!Pool) {
Pool = g_TextPool;
}
PmReleaseMemory (Pool, (PVOID) StrIn);
}
PSTR
KnownSizeUnicodeToDbcsN (
OUT PSTR StrOut,
IN PCWSTR StrIn,
IN DWORD Chars
)
/*++
Routine Description:
KnownSizeUnicodeToDbcsN converts a UNICODE string to DBCS. The caller
manages the outbound buffer.
Arguments:
StrOut - Receives the DBCS result.
StrIn - Specifies the UNICODE string to convert.
Chars - Specifies the character count of StrIn (not the byte count), or
INVALID_CHAR_COUNT for the complete string.
Return Value:
Returns StrOut.
--*/
{
DWORD rc;
if (INVALID_CHAR_COUNT == Chars) {
Chars = CharCountW (StrIn);
}
rc = (DWORD) WideCharToMultiByte (
g_GlobalCodePage,
g_MigutilWCToMBFlags,
StrIn,
(INT) Chars, // wc input count
StrOut,
(INT) Chars * 2,
NULL,
NULL
);
DEBUGMSG_IF ((
!rc && Chars,
DBG_WARNING,
"KnownSizeUnicodeToDbcsN failed."
));
StrOut[rc] = 0;
return StrOut;
}
PWSTR
KnownSizeDbcsToUnicodeN (
OUT PWSTR StrOut,
IN PCSTR StrIn,
IN DWORD Chars
)
/*++
Routine Description:
KnownSizeDbcsToUnicodeN converts a DBCS string to UNICODE. The caller
manages the outbound buffer.
Arguments:
StrOut - Receives the UNICODE result.
StrIn - Specifies the DBCS string to convert.
Chars - Specifies the character count of StrIn (not the byte count), or
INVALID_CHAR_COUNT for the complete string.
Return Value:
Returns StrOut.
--*/
{
DWORD rc;
DWORD StrInBytesToConvert;
if (INVALID_CHAR_COUNT == Chars) {
StrInBytesToConvert = ByteCountA (StrIn);
} else {
StrInBytesToConvert = (DWORD)(CharCountToPointerA (StrIn, Chars) - StrIn);
}
rc = (DWORD) MultiByteToWideChar (
g_GlobalCodePage,
0, // MB_ERR_INVALID_CHARS,
StrIn,
(INT) StrInBytesToConvert,
StrOut,
(INT) StrInBytesToConvert * 2
);
DEBUGMSG_IF ((
!rc && Chars,
DBG_WARNING,
"KnownSizeDbcsToUnicodeN failed."
));
StrOut[rc] = 0;
return StrOut;
}
PSTR
DirectUnicodeToDbcsN (
OUT PSTR StrOut,
IN PCWSTR StrIn,
IN DWORD Bytes
)
/*++
Routine Description:
DirectUnicodeToDbcsN converts a UNICODE string to DBCS. The caller
manages the outbound buffer. This function does not check for nuls
in StrIn when Bytes is non-zero, and it does not terminate the
string.
Arguments:
StrOut - Receives the DBCS result.
StrIn - Specifies the UNICODE string to convert.
Bytes - Specifies the byte count of StrIn, or INVALID_CHAR_COUNT
for the complete string.
Return Value:
Returns StrOut.
--*/
{
DWORD rc;
if (INVALID_CHAR_COUNT == Bytes) {
Bytes = ByteCountW (StrIn);
}
rc = (DWORD) WideCharToMultiByte (
g_GlobalCodePage,
g_MigutilWCToMBFlags,
StrIn,
(INT) (Bytes / sizeof (WCHAR)),
StrOut,
(INT) Bytes,
NULL,
NULL
);
DEBUGMSG_IF ((
!rc && Bytes,
DBG_WARNING,
"DirectUnicodeToDbcsN failed."
));
return StrOut + rc;
}
PWSTR
DirectDbcsToUnicodeN (
OUT PWSTR StrOut,
IN PCSTR StrIn,
IN DWORD Bytes
)
/*++
Routine Description:
DirectDbcsToUnicodeN converts a DBCS string to UNICODE. The caller
manages the outbound buffer. This function does not check for nuls
in StrIn when Bytes is non-zero, and it does not terminate the string.
Arguments:
StrOut - Receives the UNICODE result.
StrIn - Specifies the DBCS string to convert.
Bytes - Specifies the byte count of StrIn, or INVALID_CHAR_COUNT
for the complete string.
Return Value:
Returns StrOut.
--*/
{
DWORD rc;
if (INVALID_CHAR_COUNT == Bytes) {
Bytes = ByteCountA (StrIn);
}
rc = (DWORD) MultiByteToWideChar (
g_GlobalCodePage,
0, // MB_ERR_INVALID_CHARS,
StrIn,
(INT) Bytes,
StrOut,
(INT) Bytes * 2
);
DEBUGMSG_IF ((
!rc && Bytes,
DBG_WARNING,
"DirectDbcsToUnicodeN failed."
));
return StrOut + rc;
}