262 lines
8.2 KiB
C
262 lines
8.2 KiB
C
|
/****************************** Module Header ******************************\
|
||
|
* Module Name: nlsxlat.c
|
||
|
*
|
||
|
* Copyright (c) 1985-91, Microsoft Corporation
|
||
|
*
|
||
|
* This modules contains the private routines for character translation:
|
||
|
* 8-bit <=> Unicode.
|
||
|
*
|
||
|
* History:
|
||
|
* 03-Jan-1992 gregoryw
|
||
|
\***************************************************************************/
|
||
|
|
||
|
#include <nt.h>
|
||
|
#include <ntrtl.h>
|
||
|
|
||
|
/*
|
||
|
* External declarations - these are temporary tables
|
||
|
*/
|
||
|
extern USHORT TmpUnicodeToAnsiTable[];
|
||
|
extern WCHAR TmpAnsiToUnicodeTable[];
|
||
|
#ifdef DBCS
|
||
|
extern WCHAR sjtouni( USHORT );
|
||
|
#define IsDBCSFirst(w) (((unsigned char)w >= 0x81 && (unsigned char)w <= 0x9f) || (((unsigned char)w >= 0xe0 && (unsigned char)w <= 0xfc)))
|
||
|
#endif // DBCS
|
||
|
|
||
|
/*
|
||
|
* Various defines for data access
|
||
|
*/
|
||
|
#define DBCS_TABLE_SIZE 256
|
||
|
|
||
|
#define LONIBBLE(b) ((UCHAR)((UCHAR)(b) & 0xF))
|
||
|
#define HINIBBLE(b) ((UCHAR)(((UCHAR)(b) >> 4) & 0xF))
|
||
|
|
||
|
#define LOBYTE(w) ((UCHAR)(w))
|
||
|
#define HIBYTE(w) ((UCHAR)(((USHORT)(w) >> 8) & 0xFF))
|
||
|
|
||
|
/*
|
||
|
* Global data used by the translation routines.
|
||
|
*
|
||
|
*/
|
||
|
UCHAR NlsLeadByteInfo[DBCS_TABLE_SIZE]; // Lead byte info. for ACP
|
||
|
PUSHORT *NlsMbCodePageTables; // Multibyte to Unicode translation tables
|
||
|
PUSHORT NlsAnsiToUnicodeData = TmpAnsiToUnicodeTable; // Ansi CP to Unicode translation table
|
||
|
PUSHORT NlsUnicodeToAnsiData = TmpUnicodeToAnsiTable; // Unicode to Ansi CP translation table
|
||
|
|
||
|
|
||
|
NTSTATUS
|
||
|
xxxRtlMultiByteToUnicodeN(
|
||
|
OUT PWCH UnicodeString,
|
||
|
OUT PULONG BytesInUnicodeString OPTIONAL,
|
||
|
IN PCH MultiByteString,
|
||
|
IN ULONG BytesInMultiByteString)
|
||
|
|
||
|
/*++
|
||
|
|
||
|
Routine Description:
|
||
|
|
||
|
This functions converts the specified ansi source string into a
|
||
|
Unicode string. The translation is done with respect to the
|
||
|
ANSI Code Page (ACP) installed at boot time. Single byte characters
|
||
|
in the range 0x00 - 0x7f are simply zero extended as a performance
|
||
|
enhancement. In some far eastern code pages 0x5c is defined as the
|
||
|
Yen sign. For system translation we always want to consider 0x5c
|
||
|
to be the backslash character. We get this for free by zero extending.
|
||
|
|
||
|
NOTE: This routine only supports precomposed Unicode characters.
|
||
|
|
||
|
Arguments:
|
||
|
|
||
|
UnicodeString - Returns a unicode string that is equivalent to
|
||
|
the ansi source string.
|
||
|
|
||
|
BytesInUnicodeString - Returns the number of bytes in the returned
|
||
|
unicode string pointed to by UnicodeString.
|
||
|
|
||
|
MultiByteString - Supplies the ansi source string that is to be
|
||
|
converted to unicode.
|
||
|
|
||
|
BytesInMultiByteString - The number of bytes in the string pointed to
|
||
|
by MultiByteString.
|
||
|
|
||
|
Return Value:
|
||
|
|
||
|
SUCCESS - The conversion was successful
|
||
|
|
||
|
|
||
|
--*/
|
||
|
|
||
|
{
|
||
|
UCHAR Entry;
|
||
|
PWCH UnicodeStringAnchor;
|
||
|
PUSHORT DBCSTable;
|
||
|
|
||
|
UnicodeStringAnchor = UnicodeString;
|
||
|
|
||
|
#ifdef DBCS
|
||
|
while (BytesInMultiByteString--) {
|
||
|
if ( IsDBCSFirst( *MultiByteString ) ) {
|
||
|
if (!BytesInMultiByteString) {
|
||
|
return STATUS_UNSUCCESSFUL;
|
||
|
}
|
||
|
*UnicodeString++ = sjtouni( (((USHORT)(*(PUCHAR)MultiByteString++)) << 8) +
|
||
|
(USHORT)(*(PUCHAR)MultiByteString++)
|
||
|
);
|
||
|
BytesInMultiByteString--;
|
||
|
} else {
|
||
|
*UnicodeString++ = sjtouni( *(PUCHAR)MultiByteString++ );
|
||
|
}
|
||
|
}
|
||
|
#else
|
||
|
if (NlsMbCodePageTag) {
|
||
|
//
|
||
|
// The ACP is a multibyte code page. Check each character
|
||
|
// to see if it is a lead byte before doing the translation.
|
||
|
//
|
||
|
while (BytesInMultiByteString--) {
|
||
|
if ( NlsLeadByteInfo[*MultiByteString]) {
|
||
|
//
|
||
|
// Lead byte - translate the trail byte using the table
|
||
|
// that corresponds to this lead byte. NOTE: make sure
|
||
|
// we have a trail byte to convert.
|
||
|
//
|
||
|
if (!BytesInMultiByteString) {
|
||
|
return STATUS_UNSUCCESSFUL;
|
||
|
}
|
||
|
Entry = NlsLeadByteInfo[*MultiByteString++];
|
||
|
DBCSTable = NlsMbCodePageTables[HINIBBLE(Entry)] + (LONIBBLE(Entry) * DBCS_TABLE_SIZE);
|
||
|
*UnicodeString++ = DBCSTable[*MultiByteString++];
|
||
|
BytesInMultiByteString--;
|
||
|
} else {
|
||
|
//
|
||
|
// Single byte character.
|
||
|
//
|
||
|
if (*MultiByteString & 0x80) {
|
||
|
*UnicodeString++ = NlsAnsiToUnicodeData[*MultiByteString++];
|
||
|
} else {
|
||
|
*UnicodeString++ = (WCHAR)*MultiByteString++;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
} else {
|
||
|
//
|
||
|
// The ACP is a single byte code page.
|
||
|
//
|
||
|
while (BytesInMultiByteString--) {
|
||
|
if (*MultiByteString & 0x80) {
|
||
|
*UnicodeString++ = NlsAnsiToUnicodeData[*MultiByteString++];
|
||
|
} else {
|
||
|
*UnicodeString++ = (WCHAR)*MultiByteString++;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
if (ARGUMENT_PRESENT(BytesInUnicodeString)) {
|
||
|
*BytesInUnicodeString = (ULONG)((PCH)UnicodeString - (PCH)UnicodeStringAnchor);
|
||
|
}
|
||
|
|
||
|
return STATUS_SUCCESS;
|
||
|
}
|
||
|
|
||
|
|
||
|
NTSTATUS
|
||
|
xxxRtlUnicodeToMultiByteN(
|
||
|
OUT PCH MultiByteString,
|
||
|
OUT PULONG BytesInMultiByteString OPTIONAL,
|
||
|
IN PWCH UnicodeString,
|
||
|
IN ULONG BytesInUnicodeString)
|
||
|
|
||
|
/*++
|
||
|
|
||
|
Routine Description:
|
||
|
|
||
|
This functions converts the specified unicode source string into an
|
||
|
ansi string. The translation is done with respect to the
|
||
|
ANSI Code Page (ACP) loaded at boot time.
|
||
|
|
||
|
Arguments:
|
||
|
|
||
|
MultiByteString - Returns an ansi string that is equivalent to the
|
||
|
unicode source string. If the translation can not be done
|
||
|
because a character in the unicode string does not map to an
|
||
|
ansi character in the ACP, an error is returned.
|
||
|
|
||
|
BytesInMultiByteString - Returns the number of bytes in the returned
|
||
|
ansi string pointed to by MultiByteString.
|
||
|
|
||
|
UnicodeString - Supplies the unicode source string that is to be
|
||
|
converted to ansi.
|
||
|
|
||
|
BytesInUnicodeString - The number of bytes in the the string pointed to by
|
||
|
UnicodeString.
|
||
|
|
||
|
Return Value:
|
||
|
|
||
|
SUCCESS - The conversion was successful
|
||
|
|
||
|
!SUCCESS - The conversion failed. A unicode character was encountered
|
||
|
that has no translation for the current ANSI Code Page (ACP).
|
||
|
|
||
|
--*/
|
||
|
|
||
|
{
|
||
|
USHORT Offset;
|
||
|
USHORT Entry;
|
||
|
ULONG CharsInUnicodeString;
|
||
|
PCH MultiByteStringAnchor;
|
||
|
|
||
|
MultiByteStringAnchor = MultiByteString;
|
||
|
|
||
|
/*
|
||
|
* convert from bytes to chars for easier loop handling.
|
||
|
*/
|
||
|
CharsInUnicodeString = BytesInUnicodeString / sizeof(WCHAR);
|
||
|
|
||
|
while (CharsInUnicodeString--) {
|
||
|
Offset = NlsUnicodeToAnsiData[HIBYTE(*UnicodeString)];
|
||
|
if (Offset != 0) {
|
||
|
Offset = NlsUnicodeToAnsiData[Offset + HINIBBLE(*UnicodeString)];
|
||
|
if (Offset != 0) {
|
||
|
Entry = NlsUnicodeToAnsiData[Offset + LONIBBLE(*UnicodeString)];
|
||
|
if (HIBYTE(Entry) != 0) {
|
||
|
*MultiByteString++ = HIBYTE(Entry); // lead byte
|
||
|
}
|
||
|
*MultiByteString++ = LOBYTE(Entry);
|
||
|
} else {
|
||
|
//
|
||
|
// no translation for this Unicode character. Return
|
||
|
// an error.
|
||
|
//
|
||
|
#ifdef DBCS // RtlUnicodeToMultiByteN : temporary hack to avoid error return
|
||
|
if ( *UnicodeString <= (WCHAR)0xff )
|
||
|
*MultiByteString++ = (UCHAR)*UnicodeString;
|
||
|
else
|
||
|
*MultiByteString++ = '\x20';
|
||
|
#else
|
||
|
return STATUS_UNSUCCESSFUL;
|
||
|
#endif
|
||
|
}
|
||
|
} else {
|
||
|
//
|
||
|
// no translation for this Unicode character. Return an error.
|
||
|
//
|
||
|
#ifdef DBCS // RtlUnicodeToMultiByteN : temporary hack to avoid error return
|
||
|
if ( *UnicodeString <= (WCHAR)0xff )
|
||
|
*MultiByteString++ = (UCHAR)*UnicodeString;
|
||
|
else
|
||
|
*MultiByteString++ = '\x20';
|
||
|
#else
|
||
|
return STATUS_UNSUCCESSFUL;
|
||
|
#endif
|
||
|
}
|
||
|
UnicodeString++;
|
||
|
}
|
||
|
|
||
|
if (ARGUMENT_PRESENT(BytesInMultiByteString)) {
|
||
|
*BytesInMultiByteString = (ULONG)(MultiByteString - MultiByteStringAnchor);
|
||
|
}
|
||
|
|
||
|
return STATUS_SUCCESS;
|
||
|
}
|