1064 lines
23 KiB
C
1064 lines
23 KiB
C
/*++
|
||
|
||
Copyright (c) 1997-2001 Microsoft Corporation
|
||
|
||
Module Name:
|
||
|
||
utf8.c
|
||
|
||
Abstract:
|
||
|
||
Domain Name System (DNS) Library
|
||
|
||
UTF8 to\from unicode and ANSI conversions
|
||
|
||
The UTF8\unicode routines are similar to the generic ones floating
|
||
around the NT group, but a heck of a lot cleaner and more robust,
|
||
including catching the invalid UTF8 string case on the utf8 to unicode
|
||
conversion.
|
||
|
||
The UTF8\ANSI routines are optimized for the 99% case where all the
|
||
characters are <128 and no conversions is actually required.
|
||
|
||
Author:
|
||
|
||
Jim Gilroy (jamesg) March 1997
|
||
|
||
Revision History:
|
||
|
||
--*/
|
||
|
||
|
||
#include "local.h"
|
||
|
||
|
||
//
|
||
// Macros to simplify UTF8 conversions
|
||
//
|
||
|
||
#define UTF8_1ST_OF_2 0xc0 // 110x xxxx
|
||
#define UTF8_1ST_OF_3 0xe0 // 1110 xxxx
|
||
#define UTF8_1ST_OF_4 0xf0 // 1111 xxxx
|
||
#define UTF8_TRAIL 0x80 // 10xx xxxx
|
||
|
||
#define UTF8_2_MAX 0x07ff // max unicode character representable in
|
||
// in two byte UTF8
|
||
|
||
#define BIT7(ch) ((ch) & 0x80)
|
||
#define BIT6(ch) ((ch) & 0x40)
|
||
#define BIT5(ch) ((ch) & 0x20)
|
||
#define BIT4(ch) ((ch) & 0x10)
|
||
#define BIT3(ch) ((ch) & 0x08)
|
||
|
||
#define LOW6BITS(ch) ((ch) & 0x3f)
|
||
#define LOW5BITS(ch) ((ch) & 0x1f)
|
||
#define LOW4BITS(ch) ((ch) & 0x0f)
|
||
|
||
#define HIGHBYTE(wch) ((wch) & 0xff00)
|
||
|
||
//
|
||
// Surrogate pair support
|
||
// Two unicode characters may be linked to form a surrogate pair.
|
||
// And for some totally unknown reason, someone thought they
|
||
// should travel in UTF8 as four bytes instead of six.
|
||
// No one has any idea why this is true other than to complicate
|
||
// the code.
|
||
//
|
||
|
||
#define HIGH_SURROGATE_START 0xd800
|
||
#define HIGH_SURROGATE_END 0xdbff
|
||
#define LOW_SURROGATE_START 0xdc00
|
||
#define LOW_SURROGATE_END 0xdfff
|
||
|
||
|
||
//
|
||
// Max "normal conversion", make space for MAX_PATH,
|
||
// this covers all valid DNS names and strings.
|
||
//
|
||
|
||
#define TEMP_BUFFER_LENGTH (2*MAX_PATH)
|
||
|
||
|
||
|
||
DNS_STATUS
|
||
_fastcall
|
||
Dns_ValidateUtf8Byte(
|
||
IN BYTE chUtf8,
|
||
IN OUT PDWORD pdwTrailCount
|
||
)
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
Verifies that byte is valid UTF8 byte.
|
||
|
||
Arguments:
|
||
|
||
Return Value:
|
||
|
||
ERROR_SUCCESS -- if valid UTF8 given trail count
|
||
ERROR_INVALID_DATA -- if invalid
|
||
|
||
--*/
|
||
{
|
||
DWORD trailCount = *pdwTrailCount;
|
||
|
||
DNSDBG( TRACE, ( "Dns_ValidateUtf8Byte()\n" ));
|
||
|
||
//
|
||
// if ASCII byte, only requirement is no trail count
|
||
//
|
||
|
||
if ( (UCHAR)chUtf8 < 0x80 )
|
||
{
|
||
if ( trailCount == 0 )
|
||
{
|
||
return( ERROR_SUCCESS );
|
||
}
|
||
return( ERROR_INVALID_DATA );
|
||
}
|
||
|
||
//
|
||
// trail byte
|
||
// - must be in multi-byte set
|
||
//
|
||
|
||
if ( BIT6(chUtf8) == 0 )
|
||
{
|
||
if ( trailCount == 0 )
|
||
{
|
||
return( ERROR_INVALID_DATA );
|
||
}
|
||
--trailCount;
|
||
}
|
||
|
||
//
|
||
// multi-byte lead byte
|
||
// - must NOT be in existing multi-byte set
|
||
// - verify valid lead byte
|
||
|
||
else
|
||
{
|
||
if ( trailCount != 0 )
|
||
{
|
||
return( ERROR_INVALID_DATA );
|
||
}
|
||
|
||
// first of two bytes (110xxxxx)
|
||
|
||
if ( BIT5(chUtf8) == 0 )
|
||
{
|
||
trailCount = 1;
|
||
}
|
||
|
||
// first of three bytes (1110xxxx)
|
||
|
||
else if ( BIT4(chUtf8) == 0 )
|
||
{
|
||
trailCount = 2;
|
||
}
|
||
|
||
// first of four bytes (surrogate character) (11110xxx)
|
||
|
||
else if ( BIT3(chUtf8) == 0 )
|
||
{
|
||
trailCount = 3;
|
||
}
|
||
|
||
else
|
||
{
|
||
return( ERROR_INVALID_DATA );
|
||
}
|
||
}
|
||
|
||
// reset caller's trail count
|
||
|
||
*pdwTrailCount = trailCount;
|
||
return( ERROR_SUCCESS );
|
||
}
|
||
|
||
|
||
|
||
//
|
||
// UTF8 to unicode conversions
|
||
//
|
||
// For some reason UTF8 is not supported in Win9x.
|
||
// AND the implementation itself is not careful about
|
||
// validating UTF8.
|
||
//
|
||
|
||
DWORD
|
||
_fastcall
|
||
Dns_UnicodeToUtf8(
|
||
IN PWCHAR pwUnicode,
|
||
IN DWORD cchUnicode,
|
||
OUT PCHAR pchResult,
|
||
IN DWORD cchResult
|
||
)
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
Convert unicode characters to UTF8.
|
||
|
||
Result is NULL terminated if sufficient space in result
|
||
buffer is available.
|
||
|
||
Arguments:
|
||
|
||
pwUnicode -- ptr to start of unicode buffer
|
||
|
||
cchUnicode -- length of unicode buffer
|
||
|
||
pchResult -- ptr to start of result buffer for UTF8 chars
|
||
|
||
cchResult -- length of result buffer
|
||
|
||
Return Value:
|
||
|
||
Count of UTF8 characters in result, if successful.
|
||
0 on error. GetLastError() has error code.
|
||
|
||
--*/
|
||
{
|
||
WCHAR wch; // current unicode character being converted
|
||
DWORD lengthUtf8 = 0; // length of UTF8 result string
|
||
WORD lowSurrogate;
|
||
DWORD surrogateDword;
|
||
|
||
|
||
DNSDBG( TRACE, (
|
||
"Dns_UnicodeToUtf8( %.*S )\n",
|
||
cchUnicode,
|
||
pwUnicode ));
|
||
|
||
//
|
||
// loop converting unicode chars until run out or error
|
||
//
|
||
|
||
while ( cchUnicode-- )
|
||
{
|
||
wch = *pwUnicode++;
|
||
|
||
//
|
||
// ASCII character (7 bits or less) -- converts to directly
|
||
//
|
||
|
||
if ( wch < 0x80 )
|
||
{
|
||
lengthUtf8++;
|
||
|
||
if ( pchResult )
|
||
{
|
||
if ( lengthUtf8 >= cchResult )
|
||
{
|
||
goto OutOfBuffer;
|
||
}
|
||
*pchResult++ = (CHAR)wch;
|
||
}
|
||
continue;
|
||
}
|
||
|
||
//
|
||
// wide character less than 0x07ff (11bits) converts to two bytes
|
||
// - upper 5 bits in first byte
|
||
// - lower 6 bits in secondar byte
|
||
//
|
||
|
||
else if ( wch <= UTF8_2_MAX )
|
||
{
|
||
lengthUtf8 += 2;
|
||
|
||
if ( pchResult )
|
||
{
|
||
if ( lengthUtf8 >= cchResult )
|
||
{
|
||
goto OutOfBuffer;
|
||
}
|
||
*pchResult++ = UTF8_1ST_OF_2 | wch >> 6;
|
||
*pchResult++ = UTF8_TRAIL | LOW6BITS( (UCHAR)wch );
|
||
}
|
||
continue;
|
||
}
|
||
|
||
//
|
||
// surrogate pair
|
||
// - if have high surrogate followed by low surrogate then
|
||
// process as surrogate pair
|
||
// - otherwise treat character as ordinary unicode "three-byte"
|
||
// character, by falling through to below
|
||
//
|
||
|
||
else if ( wch >= HIGH_SURROGATE_START &&
|
||
wch <= HIGH_SURROGATE_END &&
|
||
cchUnicode &&
|
||
(lowSurrogate = *pwUnicode) &&
|
||
lowSurrogate >= LOW_SURROGATE_START &&
|
||
lowSurrogate <= LOW_SURROGATE_END )
|
||
{
|
||
// have a surrogate pair
|
||
// - suck up next unicode character (low surrogate of pair)
|
||
// - make full DWORD surrogate pair
|
||
// - then lay out four UTF8 bytes
|
||
// 1st of four, then three trail bytes
|
||
// 0x1111xxxx
|
||
// 0x10xxxxxx
|
||
// 0x10xxxxxx
|
||
// 0x10xxxxxx
|
||
|
||
DNSDBG( TRACE, (
|
||
"Have surrogate pair %hx : %hx\n",
|
||
wch,
|
||
lowSurrogate ));
|
||
|
||
pwUnicode++;
|
||
cchUnicode--;
|
||
lengthUtf8 += 4;
|
||
|
||
if ( pchResult )
|
||
{
|
||
if ( lengthUtf8 >= cchResult )
|
||
{
|
||
goto OutOfBuffer;
|
||
}
|
||
surrogateDword = (((wch-0xD800) << 10) + (lowSurrogate - 0xDC00) + 0x10000);
|
||
|
||
*pchResult++ = UTF8_1ST_OF_4 | (UCHAR) (surrogateDword >> 18);
|
||
*pchResult++ = UTF8_TRAIL | (UCHAR) LOW6BITS(surrogateDword >> 12);
|
||
*pchResult++ = UTF8_TRAIL | (UCHAR) LOW6BITS(surrogateDword >> 6);
|
||
*pchResult++ = UTF8_TRAIL | (UCHAR) LOW6BITS(surrogateDword);
|
||
|
||
DNSDBG( TRACE, (
|
||
"Converted surrogate -- DWORD = %08x\n"
|
||
"\tconverted %x %x %x %x\n",
|
||
surrogateDword,
|
||
(UCHAR) *(pchResult-3),
|
||
(UCHAR) *(pchResult-2),
|
||
(UCHAR) *(pchResult-1),
|
||
(UCHAR) *pchResult ));
|
||
}
|
||
}
|
||
|
||
//
|
||
// wide character (non-zero in top 5 bits) converts to three bytes
|
||
// - top 4 bits in first byte
|
||
// - middle 6 bits in second byte
|
||
// - low 6 bits in third byte
|
||
//
|
||
|
||
else
|
||
{
|
||
lengthUtf8 += 3;
|
||
|
||
if ( pchResult )
|
||
{
|
||
if ( lengthUtf8 >= cchResult )
|
||
{
|
||
goto OutOfBuffer;
|
||
}
|
||
*pchResult++ = UTF8_1ST_OF_3 | (wch >> 12);
|
||
*pchResult++ = UTF8_TRAIL | LOW6BITS( wch >> 6 );
|
||
*pchResult++ = UTF8_TRAIL | LOW6BITS( wch );
|
||
}
|
||
}
|
||
}
|
||
|
||
//
|
||
// NULL terminate buffer
|
||
// return UTF8 character count
|
||
//
|
||
|
||
if ( pchResult && lengthUtf8 < cchResult )
|
||
{
|
||
*pchResult = 0;
|
||
}
|
||
return( lengthUtf8 );
|
||
|
||
OutOfBuffer:
|
||
|
||
SetLastError( ERROR_INSUFFICIENT_BUFFER );
|
||
return( 0 );
|
||
}
|
||
|
||
|
||
|
||
|
||
DWORD
|
||
_fastcall
|
||
Dns_Utf8ToUnicode(
|
||
IN PCHAR pchUtf8,
|
||
IN DWORD cchUtf8,
|
||
OUT PWCHAR pwResult,
|
||
IN DWORD cwResult
|
||
)
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
Convert UTF8 characters to unicode.
|
||
|
||
Result is NULL terminated if sufficient space in result
|
||
buffer is available.
|
||
|
||
Arguments:
|
||
|
||
pwResult -- ptr to start of result buffer for unicode chars
|
||
|
||
cwResult -- length of result buffer in WCHAR
|
||
|
||
pwUtf8 -- ptr to start of UTF8 buffer
|
||
|
||
cchUtf8 -- length of UTF8 buffer
|
||
|
||
Return Value:
|
||
|
||
Count of unicode characters in result, if successful.
|
||
0 on error. GetLastError() has error code.
|
||
|
||
--*/
|
||
{
|
||
CHAR ch; // current UTF8 character
|
||
WCHAR wch; // current unicode character
|
||
DWORD trailCount = 0; // count of UTF8 trail bytes to follow
|
||
DWORD lengthUnicode = 0; // length of unicode result string
|
||
BOOL bsurrogatePair = FALSE;
|
||
DWORD surrogateDword;
|
||
|
||
|
||
//
|
||
// loop converting UTF8 chars until run out or error
|
||
//
|
||
|
||
while ( cchUtf8-- )
|
||
{
|
||
ch = *pchUtf8++;
|
||
|
||
//
|
||
// ASCII character -- just copy
|
||
//
|
||
|
||
if ( BIT7(ch) == 0 )
|
||
{
|
||
lengthUnicode++;
|
||
if ( pwResult )
|
||
{
|
||
if ( lengthUnicode >= cwResult )
|
||
{
|
||
goto OutOfBuffer;
|
||
}
|
||
*pwResult++ = (WCHAR)ch;
|
||
}
|
||
continue;
|
||
}
|
||
|
||
//
|
||
// UTF8 trail byte
|
||
// - if not expected, error
|
||
// - otherwise shift unicode character 6 bits and
|
||
// copy in lower six bits of UTF8
|
||
// - if last UTF8 byte, copy result to unicode string
|
||
//
|
||
|
||
else if ( BIT6(ch) == 0 )
|
||
{
|
||
if ( trailCount == 0 )
|
||
{
|
||
goto InvalidUtf8;
|
||
}
|
||
|
||
if ( !bsurrogatePair )
|
||
{
|
||
wch <<= 6;
|
||
wch |= LOW6BITS( ch );
|
||
|
||
if ( --trailCount == 0 )
|
||
{
|
||
lengthUnicode++;
|
||
if ( pwResult )
|
||
{
|
||
if ( lengthUnicode >= cwResult )
|
||
{
|
||
goto OutOfBuffer;
|
||
}
|
||
*pwResult++ = wch;
|
||
}
|
||
}
|
||
continue;
|
||
}
|
||
|
||
// surrogate pair
|
||
// - same as above EXCEPT build two unicode chars
|
||
// from surrogateDword
|
||
|
||
else
|
||
{
|
||
surrogateDword <<= 6;
|
||
surrogateDword |= LOW6BITS( ch );
|
||
|
||
if ( --trailCount == 0 )
|
||
{
|
||
lengthUnicode += 2;
|
||
|
||
if ( pwResult )
|
||
{
|
||
if ( lengthUnicode >= cwResult )
|
||
{
|
||
goto OutOfBuffer;
|
||
}
|
||
surrogateDword -= 0x10000;
|
||
*pwResult++ = (WCHAR) ((surrogateDword >> 10) + HIGH_SURROGATE_START);
|
||
*pwResult++ = (WCHAR) ((surrogateDword & 0x3ff) + LOW_SURROGATE_START);
|
||
}
|
||
bsurrogatePair = FALSE;
|
||
}
|
||
}
|
||
|
||
}
|
||
|
||
//
|
||
// UTF8 lead byte
|
||
// - if currently in extension, error
|
||
|
||
else
|
||
{
|
||
if ( trailCount != 0 )
|
||
{
|
||
goto InvalidUtf8;
|
||
}
|
||
|
||
// first of two byte character (110xxxxx)
|
||
|
||
if ( BIT5(ch) == 0 )
|
||
{
|
||
trailCount = 1;
|
||
wch = LOW5BITS(ch);
|
||
continue;
|
||
}
|
||
|
||
// first of three byte character (1110xxxx)
|
||
|
||
else if ( BIT4(ch) == 0 )
|
||
{
|
||
trailCount = 2;
|
||
wch = LOW4BITS(ch);
|
||
continue;
|
||
}
|
||
|
||
// first of four byte surrogate pair (11110xxx)
|
||
|
||
else if ( BIT3(ch) == 0 )
|
||
{
|
||
trailCount = 3;
|
||
surrogateDword = LOW4BITS(ch);
|
||
bsurrogatePair = TRUE;
|
||
}
|
||
|
||
else
|
||
{
|
||
goto InvalidUtf8;
|
||
}
|
||
}
|
||
}
|
||
|
||
// catch if hit end in the middle of UTF8 multi-byte character
|
||
|
||
if ( trailCount )
|
||
{
|
||
goto InvalidUtf8;
|
||
}
|
||
|
||
//
|
||
// NULL terminate buffer
|
||
// return the number of Unicode characters written.
|
||
//
|
||
|
||
if ( pwResult && lengthUnicode < cwResult )
|
||
{
|
||
*pwResult = 0;
|
||
}
|
||
return( lengthUnicode );
|
||
|
||
OutOfBuffer:
|
||
|
||
SetLastError( ERROR_INSUFFICIENT_BUFFER );
|
||
return( 0 );
|
||
|
||
InvalidUtf8:
|
||
|
||
SetLastError( ERROR_INVALID_DATA );
|
||
return( 0 );
|
||
}
|
||
|
||
|
||
|
||
|
||
//
|
||
// UTF8 \ ANSI conversions
|
||
//
|
||
|
||
DWORD
|
||
Dns_Utf8ToOrFromAnsi(
|
||
OUT PCHAR pchResult,
|
||
IN DWORD cchResult,
|
||
IN PCHAR pchIn,
|
||
IN DWORD cchIn,
|
||
IN DNS_CHARSET InCharSet,
|
||
IN DNS_CHARSET OutCharSet
|
||
)
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
Convert UTF8 characters to ANSI or vice versa.
|
||
|
||
Note: this function appears to call string functions (string.c)
|
||
which call back to it. However, this calls those functions
|
||
ONLY for conversions to\from unicode which do NOT call back
|
||
to these functions. Ultimately need to check if LCMapString
|
||
can handle these issues.
|
||
|
||
Arguments:
|
||
|
||
pchResult -- ptr to start of result buffer for ansi chars
|
||
|
||
cchResult -- length of result buffer
|
||
|
||
pchIn -- ptr to start of input string
|
||
|
||
cchIn -- length of input string
|
||
|
||
InCharSet -- char set of input string (DnsCharSetAnsi or DnsCharSetUtf8)
|
||
|
||
OutCharSet -- char set for result string (DnsCharSetUtf8 or DnsCharSetAnsi)
|
||
|
||
Return Value:
|
||
|
||
Count of bytes in result (including terminating NULL).
|
||
0 on error. GetLastError() has error code.
|
||
|
||
--*/
|
||
{
|
||
DWORD unicodeLength;
|
||
DWORD resultLength;
|
||
CHAR tempBuffer[ TEMP_BUFFER_LENGTH ];
|
||
PCHAR ptemp = tempBuffer;
|
||
DNS_STATUS status;
|
||
|
||
DNSDBG( TRACE, (
|
||
"Dns_Utf8ToOrFromAnsi()\n"
|
||
"\tbuffer = %p\n"
|
||
"\tbuf length = %d\n"
|
||
"\tpchString = %p (%*s)\n"
|
||
"\tcchString = %d\n"
|
||
"\tCharSetIn = %d\n"
|
||
"\tCharSetOut = %d\n",
|
||
pchResult,
|
||
cchResult,
|
||
pchIn,
|
||
cchIn, pchIn,
|
||
cchIn,
|
||
InCharSet,
|
||
OutCharSet ));
|
||
|
||
//
|
||
// validate charsets
|
||
//
|
||
|
||
ASSERT( InCharSet != OutCharSet );
|
||
ASSERT( InCharSet == DnsCharSetAnsi || InCharSet == DnsCharSetUtf8 );
|
||
ASSERT( OutCharSet == DnsCharSetAnsi || OutCharSet == DnsCharSetUtf8 );
|
||
|
||
//
|
||
// if length not given, calculate
|
||
//
|
||
|
||
if ( cchIn == 0 )
|
||
{
|
||
cchIn = strlen( pchIn );
|
||
}
|
||
|
||
//
|
||
// string completely ASCII
|
||
// - simple memcopy suffices
|
||
// - note result must have terminating NULL
|
||
//
|
||
|
||
if ( Dns_IsStringAsciiEx(
|
||
pchIn,
|
||
cchIn ) )
|
||
{
|
||
if ( !pchResult )
|
||
{
|
||
return( cchIn + 1 );
|
||
}
|
||
|
||
if ( cchResult <= cchIn )
|
||
{
|
||
status = ERROR_INSUFFICIENT_BUFFER;
|
||
goto Failed;
|
||
}
|
||
memcpy(
|
||
pchResult,
|
||
pchIn,
|
||
cchIn );
|
||
|
||
pchResult[ cchIn ] = 0;
|
||
|
||
return( cchIn+1 );
|
||
}
|
||
|
||
//
|
||
// non-ASCII
|
||
// - convert to unicode, then to result character set
|
||
//
|
||
// DCR_PERF: LCMapStringA() might be able to handle all this
|
||
// haven't figured out how yet
|
||
//
|
||
|
||
unicodeLength = Dns_GetBufferLengthForStringCopy(
|
||
pchIn,
|
||
cchIn,
|
||
InCharSet,
|
||
DnsCharSetUnicode
|
||
);
|
||
|
||
if ( unicodeLength > TEMP_BUFFER_LENGTH )
|
||
{
|
||
// can't use static buffer, must allocate
|
||
|
||
ptemp = Dns_StringCopyAllocate(
|
||
pchIn,
|
||
cchIn,
|
||
InCharSet,
|
||
DnsCharSetUnicode
|
||
);
|
||
if ( !ptemp )
|
||
{
|
||
status = ERROR_INVALID_DATA;
|
||
goto Failed;
|
||
}
|
||
}
|
||
else
|
||
{
|
||
if ( unicodeLength == 0 )
|
||
{
|
||
status = ERROR_INVALID_DATA;
|
||
goto Failed;
|
||
}
|
||
|
||
// copy into temporary buffer
|
||
|
||
resultLength = Dns_StringCopy(
|
||
ptemp,
|
||
NULL, // adequate buffer length
|
||
pchIn,
|
||
cchIn,
|
||
InCharSet,
|
||
DnsCharSetUnicode
|
||
);
|
||
if ( !resultLength )
|
||
{
|
||
status = ERROR_INVALID_DATA;
|
||
goto Failed;
|
||
}
|
||
ASSERT( resultLength == unicodeLength );
|
||
}
|
||
|
||
//
|
||
// conversion to result char set
|
||
// - if have result buffer, convert into it
|
||
// - should have at least ONE two byte character
|
||
// otherwise should have taken fast path above
|
||
//
|
||
|
||
if ( pchResult )
|
||
{
|
||
resultLength = Dns_StringCopy(
|
||
pchResult,
|
||
& cchResult, // result buffer length
|
||
ptemp,
|
||
0,
|
||
DnsCharSetUnicode,
|
||
OutCharSet
|
||
);
|
||
if ( resultLength == 0 )
|
||
{
|
||
status = ERROR_INSUFFICIENT_BUFFER;
|
||
goto Failed;
|
||
}
|
||
ASSERT( resultLength <= cchResult );
|
||
ASSERT( pchResult[resultLength-1] == 0 );
|
||
ASSERT( resultLength >= unicodeLength/2 );
|
||
}
|
||
|
||
else
|
||
{
|
||
resultLength = Dns_GetBufferLengthForStringCopy(
|
||
ptemp,
|
||
0,
|
||
DnsCharSetUnicode,
|
||
OutCharSet
|
||
);
|
||
ASSERT( resultLength >= unicodeLength/2 );
|
||
}
|
||
|
||
//
|
||
// final mapping from unicode to result character set
|
||
//
|
||
|
||
if ( ptemp != tempBuffer )
|
||
{
|
||
FREE_HEAP( ptemp );
|
||
}
|
||
|
||
return( resultLength );
|
||
|
||
|
||
Failed:
|
||
|
||
SetLastError( status );
|
||
|
||
if ( ptemp != tempBuffer )
|
||
{
|
||
FREE_HEAP( ptemp );
|
||
}
|
||
|
||
return( 0 );
|
||
}
|
||
|
||
|
||
|
||
DWORD
|
||
Dns_AnsiToUtf8(
|
||
IN PCHAR pchAnsi,
|
||
IN DWORD cchAnsi,
|
||
OUT PCHAR pchResult,
|
||
IN DWORD cchResult
|
||
)
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
Convert ANSI characters to UTF8.
|
||
|
||
Arguments:
|
||
|
||
pchAnsi -- ptr to start of ansi buffer
|
||
|
||
cchAnsi -- length of ansi buffer
|
||
|
||
pchResult -- ptr to start of result buffer for UTF8 chars
|
||
|
||
cchResult -- length of result buffer
|
||
|
||
Return Value:
|
||
|
||
Count of UTF8 characters in result, if successful.
|
||
0 on error. GetLastError() has error code.
|
||
|
||
--*/
|
||
{
|
||
return Dns_Utf8ToOrFromAnsi(
|
||
pchResult, // result buffer
|
||
cchResult,
|
||
pchAnsi, // in string
|
||
cchAnsi,
|
||
DnsCharSetAnsi, // ANSI in
|
||
DnsCharSetUtf8 // UTF8 out
|
||
);
|
||
}
|
||
|
||
|
||
|
||
DWORD
|
||
Dns_Utf8ToAnsi(
|
||
IN PCHAR pchUtf8,
|
||
IN DWORD cchUtf8,
|
||
OUT PCHAR pchResult,
|
||
IN DWORD cchResult
|
||
)
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
Convert UTF8 characters to ANSI.
|
||
|
||
Arguments:
|
||
|
||
pchResult -- ptr to start of result buffer for ansi chars
|
||
|
||
cchResult -- length of result buffer
|
||
|
||
pwUtf8 -- ptr to start of UTF8 buffer
|
||
|
||
cchUtf8 -- length of UTF8 buffer
|
||
|
||
Return Value:
|
||
|
||
Count of ansi characters in result, if successful.
|
||
0 on error. GetLastError() has error code.
|
||
|
||
--*/
|
||
{
|
||
return Dns_Utf8ToOrFromAnsi(
|
||
pchResult, // result buffer
|
||
cchResult,
|
||
pchUtf8, // in string
|
||
cchUtf8,
|
||
DnsCharSetUtf8, // UTF8 in
|
||
DnsCharSetAnsi // ANSI out
|
||
);
|
||
}
|
||
|
||
|
||
|
||
BOOL
|
||
_fastcall
|
||
Dns_IsStringAscii(
|
||
IN LPSTR pszString
|
||
)
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
Check if string is ASCII.
|
||
|
||
This is equivalent to saying
|
||
- is ANSI string already in UTF8
|
||
or
|
||
- is UTF8 string already in ANSI
|
||
|
||
This allows you to optimize for the 99% case where just
|
||
passing ASCII strings.
|
||
|
||
Arguments:
|
||
|
||
pszString -- ANSI or UTF8 string to check for ASCIIhood
|
||
|
||
Return Value:
|
||
|
||
TRUE if string is all ASCII (characters all < 128)
|
||
FALSE if non-ASCII characters.
|
||
|
||
--*/
|
||
{
|
||
register UCHAR ch;
|
||
|
||
//
|
||
// loop through until hit non-ASCII character
|
||
//
|
||
|
||
while ( ch = (UCHAR) *pszString++ )
|
||
{
|
||
if ( ch < 0x80 )
|
||
{
|
||
continue;
|
||
}
|
||
return( FALSE );
|
||
}
|
||
|
||
return( TRUE );
|
||
}
|
||
|
||
|
||
|
||
BOOL
|
||
_fastcall
|
||
Dns_IsStringAsciiEx(
|
||
IN PCHAR pchString,
|
||
IN DWORD cchString
|
||
)
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
Check if ANSI (or UTF8) string is ASCII.
|
||
|
||
This is equivalent to saying
|
||
- is ANSI string already in UTF8
|
||
or
|
||
- is UTF8 string already in ANSI
|
||
|
||
This allows you to optimize for the 99% case where just
|
||
passing ASCII strings.
|
||
|
||
Arguments:
|
||
|
||
pchString -- ptr to start of ansi buffer
|
||
|
||
cchString -- length of ansi buffer
|
||
|
||
Return Value:
|
||
|
||
TRUE if string is all ASCII (characters all < 128)
|
||
FALSE if non-ASCII characters.
|
||
|
||
--*/
|
||
{
|
||
//
|
||
// loop through until hit non-ASCII character
|
||
//
|
||
|
||
while ( cchString-- )
|
||
{
|
||
if ( (UCHAR)*pchString++ < 0x80 )
|
||
{
|
||
continue;
|
||
}
|
||
return( FALSE );
|
||
}
|
||
|
||
return( TRUE );
|
||
}
|
||
|
||
|
||
|
||
BOOL
|
||
_fastcall
|
||
Dns_IsWideStringAscii(
|
||
IN PWCHAR pwszString
|
||
)
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
Check if unicode string is ASCII.
|
||
This means all characters < 128.
|
||
|
||
Strings without extended characters need NOT be downcased
|
||
on the wire. This allows us to optimize for the 99% case
|
||
where just passing ASCII strings.
|
||
|
||
Arguments:
|
||
|
||
pwszString -- ptr to unicode string
|
||
|
||
Return Value:
|
||
|
||
TRUE if string is all ASCII (characters all < 128)
|
||
FALSE if non-ASCII characters.
|
||
|
||
--*/
|
||
{
|
||
register USHORT ch;
|
||
|
||
//
|
||
// loop through until hit non-ASCII character
|
||
//
|
||
|
||
while ( ch = (USHORT) *pwszString++ )
|
||
{
|
||
if ( ch < 0x80 )
|
||
{
|
||
continue;
|
||
}
|
||
return( FALSE );
|
||
}
|
||
|
||
return( TRUE );
|
||
}
|
||
|
||
//
|
||
// End utf8.c
|
||
//
|