/*++ Copyright (c) 1996 Microsoft Corporation Module Name: unicode.c Abstract: Simplified Unicode-Ansi conversion functions. Externally exposed routines: In-Place Conversion: KnownSizeDbcsToUnicodeN KnownSizeUnicodeToDbcsN KnownSizeWtoA KnownSizeAtoW In-Place Conversion without nul checks: DirectDbcsToUnicodeN DirectUnicodeToDbcsN DirectAtoW DirectWtoA Length/pool options: DbcsToUnicodeN UnicodeToDbcsN DbcsToUnicode UnicodeToDbcs FreeConvertedPoolStr Simplified type conversions: ConvertWtoA ConvertAtoW FreeConvertedStr TCHAR routines that can be compiled both ways: CreateDbcs CreateUnicode DestroyDbcs DestroyUnicode Author: Jim Schmidt (jimschm) 04-Aug-1997 Revision History: marcw 2-Sep-1999 Moved over from Win9xUpg project. jimschm 15-Feb-1999 Eliminated MikeCo's routines, since they are broken on FE jimschm 23-Sep-1998 Added in-place routines --*/ #include "pch.h" #include #include "utilsp.h" extern PMHANDLE g_TextPool; extern DWORD g_MigutilWCToMBFlags; WORD g_GlobalCodePage = CP_ACP; typedef VOID(WINAPI SETACP)(WORD CodePage); typedef SETACP * PSETACP; VOID SetGlobalCodePage ( IN WORD CodePage, IN LCID Locale ) { PSETACP SetACP; HANDLE Lib; g_GlobalCodePage = CodePage; if (ISNT()) { Lib = LoadLibrary (TEXT("kernel32.dll")); if (Lib) { SetACP = (PSETACP) GetProcAddress (Lib, "SetCPGlobal"); if (SetACP) { SetACP (CodePage); } FreeLibrary (Lib); } } SetThreadLocale (Locale); setlocale(LC_ALL,""); InitLeadByteTable (); } WORD SetConversionCodePage ( IN WORD CodePage ) { WORD oldCodePage = g_GlobalCodePage; g_GlobalCodePage = CodePage; return oldCodePage; } VOID GetGlobalCodePage ( OUT PWORD CodePage, OPTIONAL OUT PLCID Locale OPTIONAL ) { if (CodePage) { if (g_GlobalCodePage == CP_ACP) { *CodePage = (WORD) GetACP(); } else { *CodePage = g_GlobalCodePage; } } if (Locale) { *Locale = GetThreadLocale(); } } PCSTR RealUnicodeToDbcsN ( IN PMHANDLE Pool, OPTIONAL IN PCWSTR StrIn, IN DWORD Chars ) /*++ Routine Description: Converts a UNICODE string to DBCS. Arguments: Pool - Specifies the pool where memory is allocated from. If not specified, g_TextPool is used instead. StrIn - Specifies the inbound UNICODE string Chars - Specifies the number of characters, excluding the nul, to convert. Return Value: A pointer to the ANSI string, or NULL if an error occurred. --*/ { PSTR DbcsStr; DWORD Size; DWORD rc; if (!Pool) { Pool = g_TextPool; } if (INVALID_CHAR_COUNT == Chars) { Chars = CharCountW (StrIn); } Size = (Chars + 1) * 3; // maximum for UTF8 encoding DbcsStr = (PSTR) PmGetAlignedMemory (Pool, Size); if (!DbcsStr) { DEBUGMSG ((DBG_ERROR, "UnicodeToDbcsN could not allocate string")); return NULL; } rc = (DWORD) WideCharToMultiByte ( g_GlobalCodePage, (g_GlobalCodePage == CP_UTF8)?0:g_MigutilWCToMBFlags, StrIn, (INT) Chars, // wc input count DbcsStr, (INT) Size, NULL, NULL ); // Report error returns from WideCharToMultiByte if (!rc && Chars) { PushError(); PmReleaseMemory (Pool, DbcsStr); PopError(); DEBUGMSG (( DBG_WARNING, "UnicodeToDbcsN error caused memory to be released in pool; may cause harmless PoolMem warnings." )); return NULL; } if (g_GlobalCodePage == CP_UTF8) { DbcsStr [rc] = 0; } else { *CharCountToPointerA (DbcsStr, Chars) = 0; } return DbcsStr; } PCWSTR RealDbcsToUnicodeN ( IN PMHANDLE Pool, OPTIONAL IN PCSTR StrIn, IN DWORD Chars ) /*++ Routine Description: Converts a DBCS string to UNICODE. Arguments: Pool - Specifies pool to allocate UNICODE string from. If not specified, g_TextPool is used. StrIn - Specifies string to be converted Chars - Specifies the number of multibyte characters, excluding the nul, to convert. If -1, all of StrIn will be converted. Return Value: A pointer to the converted UNICODE string, or NULL if an error ocurred. --*/ { PWSTR UnicodeStr; DWORD UnicodeStrBufLenBytes; DWORD WcharsConverted; DWORD StrInBytesToConvert; // // Find number of multi-byte characters to convert. Punt on case where // caller asks for more chars than available. // if (INVALID_CHAR_COUNT == Chars) { Chars = CharCountA (StrIn); } // // Count bytes to convert from the input string (excludes delimiter) // StrInBytesToConvert = (DWORD)(CharCountToPointerA(StrIn, Chars) - StrIn); // // Get output buffer size, in bytes, including delimiter // UnicodeStrBufLenBytes = (Chars + 1) * sizeof (WCHAR); if (!Pool) { Pool = g_TextPool; } // // Get buffer // UnicodeStr = (PWSTR) PmGetAlignedMemory (Pool, UnicodeStrBufLenBytes); if (!UnicodeStr) { DEBUGMSG ((DBG_ERROR, "DbcsToUnicodeN could not allocate string")); return NULL; } // // Convert // WcharsConverted = (DWORD) MultiByteToWideChar ( g_GlobalCodePage, 0, StrIn, (INT) StrInBytesToConvert, UnicodeStr, (INT) UnicodeStrBufLenBytes ); // // Check for conversion error (>0 chars in, 0 chars out) // if (0 == WcharsConverted && 0 != Chars) { PushError(); PmReleaseMemory (Pool, UnicodeStr); PopError(); DEBUGMSG (( DBG_WARNING, "DbcsToUnicodeN error caused memory to be released in pool; may cause harmless warnings." )); return NULL; } // // Write delimiter on the output string // UnicodeStr[WcharsConverted] = 0; return UnicodeStr; } VOID FreeConvertedPoolStr ( IN PMHANDLE Pool, OPTIONAL IN PVOID StrIn ) /*++ Routine Description: Frees the memory allocated by UnicodeToDbcsN or DbcsToUnicodeN. Arguments: Pool - Specifies pool to allocate UNICODE string from. If not specified, g_TextPool is used. StrIn - Specifies string that was returned by UnicodeToDebcsN or DbcsToUnicodeN. Return Value: none --*/ { if (!StrIn) { return; } if (!Pool) { Pool = g_TextPool; } PmReleaseMemory (Pool, (PVOID) StrIn); } PSTR KnownSizeUnicodeToDbcsN ( OUT PSTR StrOut, IN PCWSTR StrIn, IN DWORD Chars ) /*++ Routine Description: KnownSizeUnicodeToDbcsN converts a UNICODE string to DBCS. The caller manages the outbound buffer. Arguments: StrOut - Receives the DBCS result. StrIn - Specifies the UNICODE string to convert. Chars - Specifies the character count of StrIn (not the byte count), or INVALID_CHAR_COUNT for the complete string. Return Value: Returns StrOut. --*/ { DWORD rc; if (INVALID_CHAR_COUNT == Chars) { Chars = CharCountW (StrIn); } rc = (DWORD) WideCharToMultiByte ( g_GlobalCodePage, g_MigutilWCToMBFlags, StrIn, (INT) Chars, // wc input count StrOut, (INT) Chars * 2, NULL, NULL ); DEBUGMSG_IF (( !rc && Chars, DBG_WARNING, "KnownSizeUnicodeToDbcsN failed." )); StrOut[rc] = 0; return StrOut; } PWSTR KnownSizeDbcsToUnicodeN ( OUT PWSTR StrOut, IN PCSTR StrIn, IN DWORD Chars ) /*++ Routine Description: KnownSizeDbcsToUnicodeN converts a DBCS string to UNICODE. The caller manages the outbound buffer. Arguments: StrOut - Receives the UNICODE result. StrIn - Specifies the DBCS string to convert. Chars - Specifies the character count of StrIn (not the byte count), or INVALID_CHAR_COUNT for the complete string. Return Value: Returns StrOut. --*/ { DWORD rc; DWORD StrInBytesToConvert; if (INVALID_CHAR_COUNT == Chars) { StrInBytesToConvert = ByteCountA (StrIn); } else { StrInBytesToConvert = (DWORD)(CharCountToPointerA (StrIn, Chars) - StrIn); } rc = (DWORD) MultiByteToWideChar ( g_GlobalCodePage, 0, // MB_ERR_INVALID_CHARS, StrIn, (INT) StrInBytesToConvert, StrOut, (INT) StrInBytesToConvert * 2 ); DEBUGMSG_IF (( !rc && Chars, DBG_WARNING, "KnownSizeDbcsToUnicodeN failed." )); StrOut[rc] = 0; return StrOut; } PSTR DirectUnicodeToDbcsN ( OUT PSTR StrOut, IN PCWSTR StrIn, IN DWORD Bytes ) /*++ Routine Description: DirectUnicodeToDbcsN converts a UNICODE string to DBCS. The caller manages the outbound buffer. This function does not check for nuls in StrIn when Bytes is non-zero, and it does not terminate the string. Arguments: StrOut - Receives the DBCS result. StrIn - Specifies the UNICODE string to convert. Bytes - Specifies the byte count of StrIn, or INVALID_CHAR_COUNT for the complete string. Return Value: Returns StrOut. --*/ { DWORD rc; if (INVALID_CHAR_COUNT == Bytes) { Bytes = ByteCountW (StrIn); } rc = (DWORD) WideCharToMultiByte ( g_GlobalCodePage, g_MigutilWCToMBFlags, StrIn, (INT) (Bytes / sizeof (WCHAR)), StrOut, (INT) Bytes, NULL, NULL ); DEBUGMSG_IF (( !rc && Bytes, DBG_WARNING, "DirectUnicodeToDbcsN failed." )); return StrOut + rc; } PWSTR DirectDbcsToUnicodeN ( OUT PWSTR StrOut, IN PCSTR StrIn, IN DWORD Bytes ) /*++ Routine Description: DirectDbcsToUnicodeN converts a DBCS string to UNICODE. The caller manages the outbound buffer. This function does not check for nuls in StrIn when Bytes is non-zero, and it does not terminate the string. Arguments: StrOut - Receives the UNICODE result. StrIn - Specifies the DBCS string to convert. Bytes - Specifies the byte count of StrIn, or INVALID_CHAR_COUNT for the complete string. Return Value: Returns StrOut. --*/ { DWORD rc; if (INVALID_CHAR_COUNT == Bytes) { Bytes = ByteCountA (StrIn); } rc = (DWORD) MultiByteToWideChar ( g_GlobalCodePage, 0, // MB_ERR_INVALID_CHARS, StrIn, (INT) Bytes, StrOut, (INT) Bytes * 2 ); DEBUGMSG_IF (( !rc && Bytes, DBG_WARNING, "DirectDbcsToUnicodeN failed." )); return StrOut + rc; }