windows-nt/Source/XPSP1/NT/enduser/stuff/itss/utf8.cpp
2020-09-26 16:20:57 +08:00

221 lines
6.8 KiB
C++

// UTF8.CPP -- Implementation of the Unicode to/from UTF8 conversion routines
#include "stdafx.h"
inline INT FailWith(INT iError)
{
SetLastError(iError);
return 0;
}
int WideCharToUTF8(LPCWSTR lpWideCharStr, int cchWideChar,
LPSTR lpMultiByteStr, int cchMultiByte
)
{
if ( PBYTE(lpWideCharStr) == PBYTE(lpMultiByteStr)
|| cchWideChar < -1
|| cchMultiByte < 0
)
return FailWith(ERROR_INVALID_PARAMETER);
if (cchWideChar == -1) // -1 means lpWideCharStr is null terminated.
cchWideChar = wcsLen(lpWideCharStr) + 1;
int cbNecessary = 0; // Number of UTF8 bytes necessary
// to represent the Unicode string
BOOL fStoring = cchMultiByte > 0;
for (; cchWideChar--; )
{
WCHAR wc= *lpWideCharStr++;
if (wc < 0x0080) // ASCII characters
{
cbNecessary++;
if (fStoring)
if (cchMultiByte > 0)
{
*lpMultiByteStr++ = BYTE(wc);
--cchMultiByte;
}
else
return FailWith(ERROR_INSUFFICIENT_BUFFER);
}
else
if (wc < 0x0800) // 0x0080 - 0x07FF
{
cbNecessary += 2;
if (fStoring)
if (cchMultiByte > 1)
{
cchMultiByte -= 2;
*lpMultiByteStr++ = 0xC0 | (wc >> 6);
*lpMultiByteStr++ = 0x80 | (wc & 0x3F);
}
else
return FailWith(ERROR_INSUFFICIENT_BUFFER);
}
else // 0x0800 - 0xFFFF
{
cbNecessary += 3;
if (fStoring)
if (cchMultiByte > 2)
{
cchMultiByte -= 3;
*lpMultiByteStr++ = 0xE0 | ( wc >> 12);
*lpMultiByteStr++ = 0x80 | ((wc >> 6) & 0x3F);
*lpMultiByteStr++ = 0x80 | ( wc & 0x3F);
}
else
return FailWith(ERROR_INSUFFICIENT_BUFFER);
}
}
return cbNecessary;
}
int UTF8ToWideChar(LPCSTR lpMultiByteStr, int cchMultiByte,
LPWSTR lpWideCharStr, int cchWideChar
)
{
if ( PBYTE(lpWideCharStr) == PBYTE(lpMultiByteStr)
|| cchMultiByte < -1
|| cchWideChar < 0
)
return FailWith(ERROR_INVALID_PARAMETER);
if (cchMultiByte == -1) // -1 means lpMultiByteStr is null terminated
cchMultiByte = lstrlenA(lpMultiByteStr) + 1;
int cwcNecessary = 0; // Number of Unicode characters necessary to
// represent the UTF8 sequence.
BOOL fStoring = cchWideChar > 0;
for (; cchMultiByte--; cwcNecessary++)
{
BYTE b= *lpMultiByteStr++;
if (b < 0x80) // An ASCII character
{
if (fStoring)
if (cchWideChar > 0)
{
cchWideChar--;
*lpWideCharStr++ = WCHAR(b);
}
else
return FailWith(ERROR_INSUFFICIENT_BUFFER);
}
else
if (b < 0xC0) // Trailing character in a multibyte code
return FailWith(ERROR_NO_UNICODE_TRANSLATION);
else
if (b < 0xE0) // First character of a two-byte code
{
if (cchMultiByte <= 0) // Do we have a second byte?
return FailWith(ERROR_NO_UNICODE_TRANSLATION);
cchMultiByte--;
BYTE b2 = *lpMultiByteStr++;
if ((b2 & 0xC0) != 0x80) // Trailing byte must
// have the form 10xxxxxx
return FailWith(ERROR_NO_UNICODE_TRANSLATION);
if (fStoring)
if (cchWideChar > 0)
{
cchWideChar--;
*lpWideCharStr++ = ((b & 0x1F) << 6) | (b2 & 0x3F);
}
else
return FailWith(ERROR_INSUFFICIENT_BUFFER);
}
else // First character of a three-byte code
{
if (cchMultiByte <= 1) // Do we have two more bytes?
return FailWith(ERROR_NO_UNICODE_TRANSLATION);
cchMultiByte -= 2;
BYTE b2 = *lpMultiByteStr++;
BYTE b3 = *lpMultiByteStr++;
if ( (b2 & 0xC0) != 0x80 // Trailing bytes must
|| (b3 & 0xC0) != 0x80 // have the form 10xxxxxx
)
return FailWith(ERROR_NO_UNICODE_TRANSLATION);
if (fStoring)
if (cchWideChar > 0)
{
cchWideChar--;
*lpWideCharStr++ = ((b & 0x0F) << 12) | ((b2 & 0x3F) << 6)
| (b3 & 0x3F);
}
else
return FailWith(ERROR_INSUFFICIENT_BUFFER);
}
}
return cwcNecessary;
}
UINT BuildAKey(const WCHAR *pwcImage, UINT cwcImage, PCHAR pchKeyBuffer, UINT cchKeyBuffer)
{
// This routine constructs a key from a sequence of Unicode characters.
// A key consists of a packed-32 length value followed by a UTF-8 representation
// of the Unicode characters. The resulting key will be stored in the buffer
// denoted by pchKeyBuffer and cchKeyBuffer. The cchKeyBuffer parameter defines
// the size of the key buffer in bytes.
//
// The result value will always be the number of byte required to hold the key.
// So you can dyamically allocate the key buffer by first calling this routine
// with pchKeyBuffer set to NULL, allocating from the heap, and calling a second
// time to record the key string.
UINT cbKeyName = WideCharToUTF8(pwcImage, cwcImage, NULL, 0);
PCHAR pchCursor= pchKeyBuffer;
UINT cbSize= 0;
for (UINT c= cbKeyName; ; )
{
cbSize++;
if (pchCursor)
{
if (c < 0x80)
{
if (cbSize < cchKeyBuffer)
*pchCursor++ = CHAR(c);
break;
}
if (cbSize < cchKeyBuffer)
*pchCursor++ = CHAR(c & 0x7F) | 0x10;
}
c >>= 7;
}
if (pchCursor)
WideCharToUTF8(pwcImage, cwcImage, pchCursor, cchKeyBuffer - cbSize);
return cbSize + cbKeyName;
}