1436 lines
37 KiB
C++
1436 lines
37 KiB
C++
/**********************************************************************/
|
||
/** Microsoft Windows NT **/
|
||
/** Copyright(c) Microsoft Corp., 1994 **/
|
||
/**********************************************************************/
|
||
|
||
/*
|
||
string.cxx
|
||
|
||
This module contains a light weight string class
|
||
|
||
|
||
FILE HISTORY:
|
||
Johnl 15-Aug-1994 Created
|
||
MuraliK 27-Feb-1995 Modified to be a standalone module with buffer.
|
||
MuraliK 2-June-1995 Made into separate library
|
||
|
||
*/
|
||
|
||
#include "precomp.hxx"
|
||
|
||
|
||
//
|
||
// Normal includes only for this module to be active
|
||
//
|
||
|
||
# include <opt_time.h>
|
||
|
||
extern "C" {
|
||
# include <nt.h>
|
||
# include <ntrtl.h>
|
||
# include <nturtl.h>
|
||
# include <windows.h>
|
||
};
|
||
|
||
# include "dbgutil.h"
|
||
# include <string.hxx>
|
||
# include <auxctrs.h>
|
||
|
||
# include <tchar.h>
|
||
# include <mbstring.h>
|
||
|
||
//
|
||
// String globals
|
||
//
|
||
|
||
typedef UCHAR * ( __cdecl * PFNSTRCASE ) ( UCHAR * );
|
||
typedef INT ( __cdecl * PFNSTRNICMP ) ( const UCHAR *, const UCHAR *, size_t );
|
||
typedef INT ( __cdecl * PFNSTRICMP ) ( const UCHAR *, const UCHAR * );
|
||
typedef size_t ( __cdecl * PFNSTRLEN ) ( const UCHAR * );
|
||
typedef UCHAR * (__cdecl * PFNSTRRCHR) (const UCHAR *, UINT);
|
||
|
||
PFNSTRCASE g_pfnStrupr = _mbsupr;
|
||
PFNSTRCASE g_pfnStrlwr = _mbslwr;
|
||
PFNSTRNICMP g_pfnStrnicmp = _mbsnicmp;
|
||
PFNSTRICMP g_pfnStricmp = _mbsicmp;
|
||
PFNSTRLEN g_pfnStrlen = _mbslen;
|
||
PFNSTRRCHR g_pfnStrrchr = _mbsrchr;
|
||
|
||
BOOL g_fFavorDBCS = FALSE;
|
||
|
||
#define UTF8_HACK_KEY "System\\CurrentControlSet\\Services\\InetInfo\\Parameters"
|
||
#define UTF8_HACK_VALUE "FavorDBCS"
|
||
|
||
//
|
||
// Private Definations
|
||
//
|
||
|
||
//
|
||
// When appending data, this is the extra amount we request to avoid
|
||
// reallocations
|
||
//
|
||
#define STR_SLOP 128
|
||
|
||
//
|
||
// Converts a value between zero and fifteen to the appropriate hex digit
|
||
//
|
||
#define HEXDIGIT( nDigit ) \
|
||
(TCHAR)((nDigit) > 9 ? \
|
||
(nDigit) - 10 + 'A' \
|
||
: (nDigit) + '0')
|
||
|
||
//
|
||
// Converts a single hex digit to its decimal equivalent
|
||
//
|
||
#define TOHEX( ch ) \
|
||
((ch) > '9' ? \
|
||
(ch) >= 'a' ? \
|
||
(ch) - 'a' + 10 : \
|
||
(ch) - 'A' + 10 \
|
||
: (ch) - '0')
|
||
|
||
|
||
/*******************************************************************
|
||
|
||
NAME: STR::STR
|
||
|
||
SYNOPSIS: Construct a string object
|
||
|
||
ENTRY: Optional object initializer
|
||
|
||
NOTES: If the object is not valid (i.e. !IsValid()) then GetLastError
|
||
should be called.
|
||
|
||
The object is guaranteed to construct successfully if nothing
|
||
or NULL is passed as the initializer.
|
||
|
||
********************************************************************/
|
||
|
||
// Inlined in string.hxx
|
||
|
||
|
||
VOID
|
||
STR::AuxInit( const BYTE * pInit )
|
||
{
|
||
BOOL fRet;
|
||
|
||
if ( pInit )
|
||
{
|
||
INT cbCopy = (::strlen( (const CHAR * ) pInit ) + 1) * sizeof(CHAR);
|
||
fRet = Resize( cbCopy );
|
||
|
||
if ( fRet ) {
|
||
CopyMemory( QueryPtr(), pInit, cbCopy );
|
||
m_cchLen = (cbCopy)/sizeof(CHAR) - 1;
|
||
} else {
|
||
BUFFER::SetValid( FALSE);
|
||
}
|
||
|
||
} else {
|
||
|
||
*((CHAR *) QueryPtr()) = '\0';
|
||
m_cchLen = 0;
|
||
}
|
||
|
||
return;
|
||
} // STR::AuxInit()
|
||
|
||
|
||
|
||
/*******************************************************************
|
||
|
||
NAME: STR::AuxAppend
|
||
|
||
SYNOPSIS: Appends the string onto this one.
|
||
|
||
ENTRY: Object to append
|
||
********************************************************************/
|
||
|
||
BOOL STR::AuxAppend( const BYTE * pStr, UINT cbStr, BOOL fAddSlop )
|
||
{
|
||
DBG_ASSERT( pStr != NULL );
|
||
|
||
UINT cbThis = QueryCB();
|
||
|
||
//
|
||
// Only resize when we have to. When we do resize, we tack on
|
||
// some extra space to avoid extra reallocations.
|
||
//
|
||
// Note: QuerySize returns the requested size of the string buffer,
|
||
// *not* the strlen of the buffer
|
||
//
|
||
|
||
AcIncrement( CacStringAppend);
|
||
if ( QuerySize() < cbThis + cbStr + sizeof(CHAR) )
|
||
{
|
||
if ( !Resize( cbThis + cbStr + (fAddSlop ? STR_SLOP : sizeof(CHAR) )) )
|
||
return FALSE;
|
||
}
|
||
|
||
// copy the exact string and append a null character
|
||
memcpy( (BYTE *) QueryPtr() + cbThis,
|
||
pStr,
|
||
cbStr);
|
||
m_cchLen += cbStr/sizeof(CHAR);
|
||
*((CHAR *) QueryPtr() + m_cchLen) = '\0'; // append an explicit null char
|
||
|
||
return TRUE;
|
||
} // STR::AuxAppend()
|
||
|
||
|
||
#if 0
|
||
// STR::SetLen() is inlined now
|
||
BOOL
|
||
STR::SetLen( IN DWORD cchLen)
|
||
/*++
|
||
Truncates the length of the string stored in this buffer
|
||
to specified value.
|
||
|
||
--*/
|
||
{
|
||
if ( cchLen >= QuerySize()) {
|
||
|
||
// the buffer itself is not sufficient for this length. return error.
|
||
return ( FALSE);
|
||
}
|
||
|
||
// null terminate the string at specified location
|
||
*((CHAR *) QueryPtr() + cchLen) = '\0';
|
||
m_cchLen = cchLen;
|
||
|
||
return ( TRUE);
|
||
} // STR::SetLen()
|
||
|
||
#endif // 0
|
||
|
||
|
||
/*******************************************************************
|
||
|
||
NAME: STR::LoadString
|
||
|
||
SYNOPSIS: Loads a string resource from this module's string table
|
||
or from the system string table
|
||
|
||
ENTRY: dwResID - System error or module string ID
|
||
lpszModuleName - name of the module from which to load.
|
||
If NULL, then load the string from system table.
|
||
|
||
********************************************************************/
|
||
|
||
BOOL STR::LoadString( IN DWORD dwResID,
|
||
IN LPCTSTR lpszModuleName, // Optional
|
||
IN DWORD dwLangID // Optional
|
||
)
|
||
{
|
||
BOOL fReturn = FALSE;
|
||
INT cch;
|
||
|
||
//
|
||
// If lpszModuleName is NULL, load the string from system's string table.
|
||
//
|
||
|
||
if ( lpszModuleName == NULL) {
|
||
|
||
BYTE * pchBuff = NULL;
|
||
|
||
//
|
||
// Call the appropriate function so we don't have to do the Unicode
|
||
// conversion
|
||
//
|
||
|
||
cch = ::FormatMessageA( FORMAT_MESSAGE_ALLOCATE_BUFFER |
|
||
FORMAT_MESSAGE_IGNORE_INSERTS |
|
||
FORMAT_MESSAGE_MAX_WIDTH_MASK |
|
||
FORMAT_MESSAGE_FROM_SYSTEM,
|
||
NULL,
|
||
dwResID,
|
||
dwLangID,
|
||
(LPSTR) &pchBuff,
|
||
1024,
|
||
NULL );
|
||
|
||
if ( cch ) {
|
||
|
||
fReturn = Copy( (LPCSTR) pchBuff, cch );
|
||
}
|
||
|
||
//
|
||
// Free the buffer FormatMessage allocated
|
||
//
|
||
|
||
if ( cch )
|
||
{
|
||
::LocalFree( (VOID*) pchBuff );
|
||
}
|
||
|
||
} else {
|
||
|
||
CHAR ach[STR_MAX_RES_SIZE];
|
||
cch = ::LoadStringA( GetModuleHandle( lpszModuleName),
|
||
dwResID,
|
||
(CHAR *) ach,
|
||
sizeof(ach));
|
||
if ( cch )
|
||
{
|
||
fReturn = Copy( (LPSTR) ach, cch );
|
||
}
|
||
}
|
||
|
||
return ( fReturn);
|
||
|
||
} // STR::LoadString()
|
||
|
||
|
||
|
||
|
||
BOOL STR::LoadString( IN DWORD dwResID,
|
||
IN HMODULE hModule
|
||
)
|
||
{
|
||
DBG_ASSERT( hModule != NULL );
|
||
|
||
BOOL fReturn = FALSE;
|
||
INT cch;
|
||
CHAR ach[STR_MAX_RES_SIZE];
|
||
|
||
cch = ::LoadStringA(hModule,
|
||
dwResID,
|
||
(CHAR *) ach,
|
||
sizeof(ach));
|
||
if ( cch ) {
|
||
|
||
fReturn = Copy( (LPSTR) ach, cch );
|
||
}
|
||
|
||
return ( fReturn);
|
||
|
||
} // STR::LoadString()
|
||
|
||
|
||
|
||
BOOL
|
||
STR::FormatString(
|
||
IN DWORD dwResID,
|
||
IN LPCTSTR apszInsertParams[],
|
||
IN LPCTSTR lpszModuleName,
|
||
IN DWORD cbMaxMsg
|
||
)
|
||
{
|
||
DWORD cch;
|
||
LPSTR pchBuff;
|
||
BOOL fRet = FALSE;
|
||
|
||
cch = ::FormatMessageA( FORMAT_MESSAGE_ALLOCATE_BUFFER |
|
||
FORMAT_MESSAGE_ARGUMENT_ARRAY |
|
||
FORMAT_MESSAGE_FROM_HMODULE,
|
||
GetModuleHandle( lpszModuleName ),
|
||
dwResID,
|
||
0,
|
||
(LPSTR) &pchBuff,
|
||
cbMaxMsg * sizeof(WCHAR),
|
||
(va_list *) apszInsertParams );
|
||
|
||
if ( cch )
|
||
{
|
||
fRet = Copy( (LPCSTR) pchBuff, cch );
|
||
|
||
::LocalFree( (VOID*) pchBuff );
|
||
}
|
||
|
||
/* INTRINSA suppress = uninitialized */
|
||
return fRet;
|
||
}
|
||
|
||
|
||
|
||
/*******************************************************************
|
||
|
||
NAME: STR::Escape
|
||
|
||
SYNOPSIS: Replaces non-ASCII characters with their hex equivalent
|
||
|
||
NOTES:
|
||
|
||
HISTORY:
|
||
Johnl 17-Aug-1994 Created
|
||
|
||
********************************************************************/
|
||
|
||
BOOL STR::Escape( VOID )
|
||
{
|
||
CHAR * pch = QueryStr();
|
||
int i = 0;
|
||
CHAR ch;
|
||
|
||
DBG_ASSERT( pch );
|
||
|
||
while ( ch = pch[i] )
|
||
{
|
||
//
|
||
// Escape characters that are in the non-printable range
|
||
// but ignore CR and LF
|
||
//
|
||
|
||
if ( (((ch >= 0) && (ch <= 32)) ||
|
||
((ch >= 128) && (ch <= 159))||
|
||
(ch == '%') || (ch == '?') || (ch == '+') || (ch == '&') ||
|
||
(ch == '#')) &&
|
||
!(ch == '\n' || ch == '\r') )
|
||
{
|
||
if ( !Resize( QuerySize() + 2 * sizeof(CHAR) ))
|
||
return FALSE;
|
||
|
||
//
|
||
// Resize can change the base pointer
|
||
//
|
||
|
||
pch = QueryStr();
|
||
|
||
//
|
||
// Insert the escape character
|
||
//
|
||
|
||
pch[i] = '%';
|
||
|
||
//
|
||
// Insert a space for the two hex digits (memory can overlap)
|
||
//
|
||
|
||
/* INTRINSA suppress = uninitialized */
|
||
|
||
::memmove( &pch[i+3],
|
||
&pch[i+1],
|
||
(::strlen( &pch[i+1] ) + 1) * sizeof(CHAR));
|
||
|
||
//
|
||
// Convert the low then the high character to hex
|
||
//
|
||
|
||
UINT nDigit = (UINT)(ch % 16);
|
||
|
||
pch[i+2] = HEXDIGIT( nDigit );
|
||
|
||
ch /= 16;
|
||
nDigit = (UINT)(ch % 16);
|
||
|
||
pch[i+1] = HEXDIGIT( nDigit );
|
||
|
||
i += 3;
|
||
}
|
||
else
|
||
i++;
|
||
}
|
||
|
||
m_cchLen = ::strlen( QueryStr()); // to be safe recalc the new length
|
||
return TRUE;
|
||
} // STR::Escape()
|
||
|
||
|
||
/*******************************************************************
|
||
|
||
NAME: STR::EscapeSpaces
|
||
|
||
SYNOPSIS: Replaces all spaces with their hex equivalent
|
||
|
||
NOTES:
|
||
|
||
HISTORY:
|
||
Johnl 17-Aug-1994 Created
|
||
|
||
********************************************************************/
|
||
|
||
BOOL STR::EscapeSpaces( VOID )
|
||
{
|
||
CHAR * pch = QueryStr();
|
||
CHAR * pchTmp;
|
||
int i = 0;
|
||
|
||
DBG_ASSERT( pch );
|
||
|
||
while ( pchTmp = strchr( pch + i, ' ' ))
|
||
{
|
||
i = DIFF( pchTmp - QueryStr() );
|
||
|
||
if ( !Resize( QuerySize() + 2 * sizeof(CHAR) ))
|
||
return FALSE;
|
||
|
||
//
|
||
// Resize can change the base pointer
|
||
//
|
||
|
||
pch = QueryStr();
|
||
|
||
//
|
||
// Insert the escape character
|
||
//
|
||
|
||
pch[i] = '%';
|
||
|
||
//
|
||
// Insert a space for the two hex digits (memory can overlap)
|
||
//
|
||
|
||
::memmove( &pch[i+3],
|
||
&pch[i+1],
|
||
(::strlen( &pch[i+1] ) + 1) * sizeof(CHAR));
|
||
|
||
//
|
||
// This routine only replaces spaces
|
||
//
|
||
|
||
pch[i+1] = '2';
|
||
pch[i+2] = '0';
|
||
}
|
||
|
||
//
|
||
// If i is zero then no spaces were found
|
||
//
|
||
|
||
if ( i != 0 )
|
||
{
|
||
m_cchLen = ::strlen( QueryStr()); // to be safe recalc the new length
|
||
}
|
||
|
||
return TRUE;
|
||
|
||
} // STR::EscapeSpaces()
|
||
|
||
|
||
|
||
/*******************************************************************
|
||
|
||
NAME: STR::Unescape
|
||
|
||
SYNOPSIS: Replaces hex escapes with the Latin-1 equivalent
|
||
|
||
NOTES: This is a Unicode only method
|
||
|
||
HISTORY:
|
||
Johnl 17-Aug-1994 Created
|
||
|
||
********************************************************************/
|
||
|
||
BOOL STR::Unescape( VOID )
|
||
{
|
||
CHAR *pScan;
|
||
CHAR *pDest;
|
||
CHAR *pNextScan;
|
||
wchar_t wch;
|
||
DWORD dwLen;
|
||
BOOL fChanged = FALSE;
|
||
|
||
pDest = pScan = strchr( QueryStr(), '%');
|
||
|
||
while (pScan)
|
||
{
|
||
if ( (pScan[1] == 'u' || pScan[1] == 'U') &&
|
||
::isxdigit( (UCHAR)pScan[2] ) &&
|
||
::isxdigit( (UCHAR)pScan[3] ) &&
|
||
::isxdigit( (UCHAR)pScan[4] ) &&
|
||
::isxdigit( (UCHAR)pScan[5] ) )
|
||
{
|
||
wch = TOHEX(pScan[2]) * 4096 + TOHEX(pScan[3]) * 256;
|
||
wch += TOHEX(pScan[4]) * 16 + TOHEX(pScan[5]);
|
||
|
||
dwLen = WideCharToMultiByte( CP_ACP,
|
||
0,
|
||
&wch,
|
||
1,
|
||
(LPSTR) pDest,
|
||
2,
|
||
NULL,
|
||
NULL );
|
||
|
||
pDest += dwLen;
|
||
pScan += 6;
|
||
fChanged = TRUE;
|
||
}
|
||
else if ( ::isxdigit( (UCHAR)pScan[1] ) && // WinSE 4944
|
||
::isxdigit( (UCHAR)pScan[2] ))
|
||
{
|
||
*pDest = TOHEX(pScan[1]) * 16 + TOHEX(pScan[2]);
|
||
|
||
pDest ++;
|
||
pScan += 3;
|
||
fChanged = TRUE;
|
||
}
|
||
else // Not an escaped char, just a '%'
|
||
{
|
||
if (fChanged)
|
||
*pDest = *pScan;
|
||
|
||
pDest++;
|
||
pScan++;
|
||
}
|
||
|
||
//
|
||
// Copy all the information between this and the next escaped char
|
||
//
|
||
pNextScan = strchr( pScan, '%');
|
||
|
||
if (fChanged) // pScan!=pDest, so we have to copy the char's
|
||
{
|
||
if (!pNextScan) // That was the last '%' in the string
|
||
{
|
||
::memmove( pDest,
|
||
pScan,
|
||
(::strlen( pScan ) + 1) * sizeof(CHAR)); // +1 to copy '\0'
|
||
}
|
||
else // There is another '%', and it is not back to back with this one
|
||
if (dwLen = DIFF(pNextScan - pScan))
|
||
{
|
||
::memmove( pDest,
|
||
pScan,
|
||
dwLen * sizeof(CHAR));
|
||
pDest += dwLen;
|
||
}
|
||
}
|
||
|
||
pScan = pNextScan;
|
||
}
|
||
|
||
if ( fChanged )
|
||
{
|
||
m_cchLen = ::strlen( QueryStr()); // for safety recalc the length
|
||
}
|
||
|
||
return TRUE;
|
||
}
|
||
|
||
|
||
|
||
BOOL
|
||
STR::CopyToBuffer( WCHAR * lpszBuffer, LPDWORD lpcch) const
|
||
/*++
|
||
Description:
|
||
Copies the string into the WCHAR buffer passed in if the buffer
|
||
is sufficient to hold the translated string.
|
||
If the buffer is small, the function returns small and sets *lpcch
|
||
to contain the required number of characters.
|
||
|
||
Arguments:
|
||
lpszBuffer pointer to WCHAR buffer which on return contains
|
||
the UNICODE version of string on success.
|
||
lpcch pointer to DWORD containing the length of the buffer.
|
||
If *lpcch == 0 then the function returns TRUE with
|
||
the count of characters required stored in *lpcch.
|
||
Also in this case lpszBuffer is not affected.
|
||
Returns:
|
||
TRUE on success.
|
||
FALSE on failure. Use GetLastError() for further details.
|
||
|
||
History:
|
||
MuraliK 11-30-94
|
||
--*/
|
||
{
|
||
BOOL fReturn = TRUE;
|
||
|
||
if ( lpcch == NULL) {
|
||
SetLastError( ERROR_INVALID_PARAMETER);
|
||
return ( FALSE);
|
||
}
|
||
|
||
if ( *lpcch == 0) {
|
||
|
||
//
|
||
// Inquiring the size of buffer alone
|
||
//
|
||
*lpcch = QueryCCH() + 1; // add one character for terminating null
|
||
} else {
|
||
|
||
//
|
||
// Copy after conversion from ANSI to Unicode
|
||
//
|
||
int iRet;
|
||
iRet = MultiByteToWideChar( CP_ACP, MB_PRECOMPOSED,
|
||
QueryStrA(), QueryCCH() + 1,
|
||
lpszBuffer, (int )*lpcch);
|
||
|
||
if ( iRet == 0 || iRet != (int ) *lpcch) {
|
||
|
||
//
|
||
// Error in conversion.
|
||
//
|
||
fReturn = FALSE;
|
||
}
|
||
}
|
||
|
||
return ( fReturn);
|
||
} // STR::CopyToBuffer()
|
||
|
||
|
||
BOOL
|
||
STR::CopyToBuffer( CHAR * lpszBuffer, LPDWORD lpcch) const
|
||
/*++
|
||
Description:
|
||
Copies the string into the CHAR buffer passed in if the buffer
|
||
is sufficient to hold the translated string.
|
||
If the buffer is small, the function returns small and sets *lpcch
|
||
to contain the required number of characters.
|
||
|
||
Arguments:
|
||
lpszBuffer pointer to CHAR buffer which on return contains
|
||
the string on success.
|
||
lpcch pointer to DWORD containing the length of the buffer.
|
||
If *lpcch == 0 then the function returns TRUE with
|
||
the count of characters required stored in *lpcch.
|
||
Also in this case lpszBuffer is not affected.
|
||
Returns:
|
||
TRUE on success.
|
||
FALSE on failure. Use GetLastError() for further details.
|
||
|
||
History:
|
||
MuraliK 20-Nov-1996
|
||
--*/
|
||
{
|
||
BOOL fReturn = TRUE;
|
||
|
||
if ( lpcch == NULL) {
|
||
SetLastError( ERROR_INVALID_PARAMETER);
|
||
return ( FALSE);
|
||
}
|
||
|
||
register DWORD cch = QueryCCH() + 1;
|
||
|
||
if ( (*lpcch >= cch) && ( NULL != lpszBuffer)) {
|
||
|
||
DBG_ASSERT( lpszBuffer);
|
||
CopyMemory( lpszBuffer, QueryStrA(), cch);
|
||
} else {
|
||
DBG_ASSERT( (NULL == lpszBuffer) || (*lpcch < cch));
|
||
SetLastError( ERROR_INSUFFICIENT_BUFFER);
|
||
fReturn = FALSE;
|
||
}
|
||
|
||
*lpcch = cch;
|
||
|
||
return ( fReturn);
|
||
} // STR::CopyToBuffer()
|
||
|
||
BOOL
|
||
STR::SafeCopy( const CHAR * pchInit )
|
||
{
|
||
DWORD cchLen = 0;
|
||
char cFirstByte = '\0';
|
||
BOOL bReturn = TRUE;
|
||
if ( QueryPtr() ) {
|
||
cFirstByte = *(QueryStr());
|
||
cchLen = m_cchLen;
|
||
*(QueryStr()) = '\0';
|
||
m_cchLen = 0;
|
||
}
|
||
if (pchInit != NULL) {
|
||
bReturn = AuxAppend( (const BYTE *) pchInit, ::strlen( pchInit ), FALSE );
|
||
if (!bReturn && QueryPtr()) {
|
||
*(QueryStr()) = cFirstByte;
|
||
m_cchLen = cchLen;
|
||
}
|
||
}
|
||
return bReturn;
|
||
}
|
||
|
||
|
||
/*******************************************************************
|
||
|
||
NAME: ::CollapseWhite
|
||
|
||
SYNOPSIS: Collapses white space starting at the passed pointer.
|
||
|
||
RETURNS: Returns a pointer to the next chunk of white space or the
|
||
end of the string.
|
||
|
||
NOTES: This is a Unicode only method
|
||
|
||
HISTORY:
|
||
Johnl 24-Aug-1994 Created
|
||
|
||
********************************************************************/
|
||
|
||
WCHAR * CollapseWhite( WCHAR * pch )
|
||
{
|
||
LPWSTR pchStart = pch;
|
||
|
||
while ( ISWHITE( *pch ) )
|
||
pch++;
|
||
|
||
::memmove( pchStart,
|
||
pch,
|
||
DIFF(pch - pchStart) );
|
||
|
||
while ( *pch && !ISWHITE( *pch ))
|
||
pch++;
|
||
|
||
return pch;
|
||
} // CollapseWhite()
|
||
|
||
|
||
|
||
|
||
|
||
//
|
||
// Private constants.
|
||
//
|
||
|
||
#define ACTION_NOTHING 0x00000000
|
||
#define ACTION_EMIT_CH 0x00010000
|
||
#define ACTION_EMIT_DOT_CH 0x00020000
|
||
#define ACTION_EMIT_DOT_DOT_CH 0x00030000
|
||
#define ACTION_BACKUP 0x00040000
|
||
#define ACTION_MASK 0xFFFF0000
|
||
|
||
|
||
//
|
||
// Private globals.
|
||
//
|
||
|
||
INT p_StateTable[16] =
|
||
{
|
||
// state 0
|
||
0 , // other
|
||
0 , // "."
|
||
4 , // EOS
|
||
1 , // "\"
|
||
|
||
// state 1
|
||
0 , // other
|
||
2 , // "."
|
||
4 , // EOS
|
||
1 , // "\"
|
||
|
||
// state 2
|
||
0 , // other
|
||
3 , // "."
|
||
4 , // EOS
|
||
1 , // "\"
|
||
|
||
// state 3
|
||
0 , // other
|
||
0 , // "."
|
||
4 , // EOS
|
||
1 // "\"
|
||
};
|
||
|
||
|
||
|
||
INT p_ActionTable[16] =
|
||
{
|
||
// state 0
|
||
ACTION_EMIT_CH, // other
|
||
ACTION_EMIT_CH, // "."
|
||
ACTION_EMIT_CH, // EOS
|
||
ACTION_EMIT_CH, // "\"
|
||
|
||
// state 1
|
||
ACTION_EMIT_CH, // other
|
||
ACTION_NOTHING, // "."
|
||
ACTION_EMIT_CH, // EOS
|
||
ACTION_NOTHING, // "\"
|
||
|
||
// state 2
|
||
ACTION_EMIT_DOT_CH, // other
|
||
ACTION_NOTHING, // "."
|
||
ACTION_EMIT_CH, // EOS
|
||
ACTION_NOTHING, // "\"
|
||
|
||
// state 3
|
||
ACTION_EMIT_DOT_DOT_CH, // other
|
||
ACTION_EMIT_DOT_DOT_CH, // "."
|
||
ACTION_BACKUP, // EOS
|
||
ACTION_BACKUP // "\"
|
||
};
|
||
|
||
// since max states = 4, we calculat the index by multiplying with 4.
|
||
# define IndexFromState( st) ( (st) * 4)
|
||
|
||
|
||
// the following table provides the index for various ISA Latin1 characters
|
||
// in the incoming URL.
|
||
// It assumes that the URL is ISO Latin1 == ASCII
|
||
INT p_rgIndexForChar[] = {
|
||
|
||
2, // null char
|
||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1 thru 10
|
||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 11 thru 20
|
||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 21 thru 30
|
||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 31 thru 40
|
||
0, 0, 0, 0, 0, 1, 3, 0, 0, 0, // 41 thru 50 46 = '.' 47 = '/'
|
||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 51 thru 60
|
||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 61 thru 70
|
||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 71 thru 80
|
||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 81 thru 90
|
||
0, 3, 0, 0, 0, 0, 0, 0, 0, 0, // 91 thru 100 92 = '\\'
|
||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 101 thru 110
|
||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 111 thru 120
|
||
0, 0, 0, 0, 0, 0, 0, 0 // 121 thru 128
|
||
};
|
||
|
||
#define IS_UTF8_TRAILBYTE(ch) (((ch) & 0xc0) == 0x80)
|
||
|
||
|
||
/*******************************************************************
|
||
|
||
NAME: IsUTF8URL
|
||
|
||
ENTRY: pszPath - The path to sanitize.
|
||
|
||
HISTORY:
|
||
atsusk 06-Jan-1998 Created.
|
||
|
||
********************************************************************/
|
||
|
||
BOOL IsUTF8URL(CHAR * pszPath)
|
||
{
|
||
CHAR ch;
|
||
|
||
if ( g_fFavorDBCS )
|
||
{
|
||
return ( MultiByteToWideChar( CP_ACP,
|
||
MB_ERR_INVALID_CHARS,
|
||
pszPath,
|
||
-1,
|
||
NULL,
|
||
0) == 0);
|
||
}
|
||
|
||
while (ch = *pszPath++) {
|
||
|
||
if (ch & 0x80) {
|
||
wchar_t wch;
|
||
int iLen;
|
||
BOOL bDefault = FALSE;
|
||
char chTrail1;
|
||
char chTrail2;
|
||
|
||
chTrail1 = *pszPath++;
|
||
if (chTrail1) {
|
||
chTrail2 = *pszPath;
|
||
} else {
|
||
chTrail2 = 0;
|
||
}
|
||
|
||
if ( ((ch & 0xF0) == 0xE0) &&
|
||
IS_UTF8_TRAILBYTE(chTrail1) &&
|
||
IS_UTF8_TRAILBYTE(chTrail2) ) {
|
||
|
||
// handle three byte case
|
||
// 1110xxxx 10xxxxxx 10xxxxxx
|
||
wch = (wchar_t) (((ch & 0x0f) << 12) |
|
||
((chTrail1 & 0x3f) << 6) |
|
||
(chTrail2 & 0x3f));
|
||
pszPath++;
|
||
|
||
} else
|
||
if ( ((ch & 0xE0) == 0xC0) &&
|
||
IS_UTF8_TRAILBYTE(chTrail1) ) {
|
||
|
||
// handle two byte case
|
||
// 110xxxxx 10xxxxxx
|
||
|
||
wch = (wchar_t) (((ch & 0x1f) << 6) | (chTrail1 & 0x3f));
|
||
|
||
} else
|
||
return FALSE;
|
||
|
||
iLen = WideCharToMultiByte( CP_ACP,
|
||
0,
|
||
&wch,
|
||
1,
|
||
NULL,
|
||
0,
|
||
NULL,
|
||
&bDefault );
|
||
|
||
if (bDefault == TRUE || iLen == 0 || iLen > 2)
|
||
return FALSE;
|
||
}
|
||
}
|
||
|
||
return TRUE;
|
||
} // IsUTF8URL()
|
||
|
||
|
||
/*******************************************************************
|
||
|
||
NAME: CanonURL
|
||
|
||
SYNOPSIS: Sanitizes a path by removing bogus path elements.
|
||
|
||
As expected, "/./" entries are simply removed, and
|
||
"/../" entries are removed along with the previous
|
||
path element.
|
||
|
||
To maintain compatibility with URL path semantics
|
||
additional transformations are required. All backward
|
||
slashes "\\" are converted to forward slashes. Any
|
||
repeated forward slashes (such as "///") are mapped to
|
||
single backslashes.
|
||
|
||
A state table (see the p_StateTable global at the
|
||
beginning of this file) is used to perform most of
|
||
the transformations. The table's rows are indexed
|
||
by current state, and the columns are indexed by
|
||
the current character's "class" (either slash, dot,
|
||
NULL, or other). Each entry in the table consists
|
||
of the new state tagged with an action to perform.
|
||
See the ACTION_* constants for the valid action
|
||
codes.
|
||
|
||
ENTRY: pszPath - The path to sanitize.
|
||
fIsDBCSLocale - Indicates the server is in a
|
||
locale that uses DBCS.
|
||
|
||
HISTORY:
|
||
KeithMo 07-Sep-1994 Created.
|
||
MuraliK 28-Apr-1995 Adopted this for symbolic paths
|
||
|
||
********************************************************************/
|
||
INT
|
||
CanonURL(
|
||
CHAR * pszPath,
|
||
BOOL fIsDBCSLocale
|
||
)
|
||
{
|
||
UCHAR * pszSrc;
|
||
UCHAR * pszDest;
|
||
DWORD ch;
|
||
INT index;
|
||
BOOL fDBCS = FALSE;
|
||
DWORD cchMultiByte = 0;
|
||
|
||
DBG_ASSERT( pszPath != NULL );
|
||
|
||
//
|
||
// Always look for UTF8 except when DBCS characters are detected
|
||
//
|
||
BOOL fScanForUTF8 = IsUTF8URL(pszPath);
|
||
|
||
// If fScanForUTF8 is true, this URL is UTF8. don't recognize DBCS.
|
||
if (fIsDBCSLocale && fScanForUTF8) {
|
||
fIsDBCSLocale = FALSE;
|
||
}
|
||
|
||
//
|
||
// Start our scan at the first character
|
||
//
|
||
|
||
pszSrc = pszDest = (UCHAR *) pszPath;
|
||
|
||
//
|
||
// State 0 is the initial state.
|
||
//
|
||
index = 0; // State = 0
|
||
|
||
//
|
||
// Loop until we enter state 4 (the final, accepting state).
|
||
//
|
||
|
||
do {
|
||
|
||
//
|
||
// Grab the next character from the path and compute its
|
||
// next state. While we're at it, map any forward
|
||
// slashes to backward slashes.
|
||
//
|
||
|
||
index = IndexFromState( p_StateTable[index]); // 4 = # states
|
||
ch = (DWORD ) *pszSrc++;
|
||
|
||
//
|
||
// If this is a DBCS trailing byte - skip it
|
||
//
|
||
|
||
if ( !fIsDBCSLocale )
|
||
{
|
||
index += (( ch >= 0x80) ? 0 : p_rgIndexForChar[ch]);
|
||
}
|
||
else
|
||
{
|
||
if ( fDBCS )
|
||
{
|
||
//
|
||
// If this is a 0 terminator, we need to set next
|
||
// state accordingly
|
||
//
|
||
|
||
if ( ch == 0 )
|
||
{
|
||
index += p_rgIndexForChar[ ch ];
|
||
}
|
||
|
||
//
|
||
// fDBCS == TRUE means this byte was a trail byte.
|
||
// index is implicitly set to zero.
|
||
//
|
||
fDBCS = FALSE;
|
||
}
|
||
else
|
||
{
|
||
index += (( ch >= 0x80) ? 0 : p_rgIndexForChar[ch]);
|
||
|
||
if ( IsDBCSLeadByte( (UCHAR)ch ) )
|
||
{
|
||
//
|
||
// This is a lead byte, so the next is a trail.
|
||
//
|
||
fDBCS = TRUE;
|
||
}
|
||
}
|
||
}
|
||
|
||
//
|
||
// Interesting UTF8 characters always have the top bit set
|
||
//
|
||
|
||
if ( (ch & 0x80) && fScanForUTF8 )
|
||
{
|
||
wchar_t wch;
|
||
UCHAR mbstr[2];
|
||
|
||
//
|
||
// This is a UTF8 character, convert it here.
|
||
// index is implicitly set to zero.
|
||
//
|
||
if ( cchMultiByte < 2 )
|
||
{
|
||
char chTrail1;
|
||
char chTrail2;
|
||
|
||
chTrail1 = *pszSrc;
|
||
if (chTrail1) {
|
||
chTrail2 = *(pszSrc+1);
|
||
} else {
|
||
chTrail2 = 0;
|
||
}
|
||
wch = 0;
|
||
|
||
if ((ch & 0xf0) == 0xe0)
|
||
{
|
||
// handle three byte case
|
||
// 1110xxxx 10xxxxxx 10xxxxxx
|
||
|
||
wch = (wchar_t) (((ch & 0x0f) << 12) |
|
||
((chTrail1 & 0x3f) << 6) |
|
||
(chTrail2 & 0x3f));
|
||
|
||
cchMultiByte = WideCharToMultiByte( CP_ACP,
|
||
0,
|
||
&wch,
|
||
1,
|
||
(LPSTR) mbstr,
|
||
2,
|
||
NULL,
|
||
NULL );
|
||
|
||
ch = mbstr[0];
|
||
pszSrc += (3 - cchMultiByte);
|
||
|
||
// WinSE 12843: Security Fix, Index should be updated for this character
|
||
index += (( ch >= 0x80) ? 0 : p_rgIndexForChar[ch]);
|
||
|
||
} else if ((ch & 0xe0) == 0xc0)
|
||
{
|
||
// handle two byte case
|
||
// 110xxxxx 10xxxxxx
|
||
|
||
wch = (wchar_t) (((ch & 0x1f) << 6) | (chTrail1 & 0x3f));
|
||
|
||
cchMultiByte = WideCharToMultiByte( CP_ACP,
|
||
0,
|
||
&wch,
|
||
1,
|
||
(LPSTR) mbstr,
|
||
2,
|
||
NULL,
|
||
NULL );
|
||
|
||
ch = mbstr[0];
|
||
pszSrc += (2 - cchMultiByte);
|
||
|
||
// WinSE 12843: Security Fix, Index should be updated for this character
|
||
index += (( ch >= 0x80) ? 0 : p_rgIndexForChar[ch]);
|
||
}
|
||
|
||
} else {
|
||
//
|
||
// get ready to emit 2nd byte of converted character
|
||
//
|
||
ch = mbstr[1];
|
||
cchMultiByte = 0;
|
||
}
|
||
}
|
||
|
||
|
||
//
|
||
// Perform the action associated with the state.
|
||
//
|
||
|
||
switch( p_ActionTable[index] )
|
||
{
|
||
case ACTION_EMIT_DOT_DOT_CH :
|
||
*pszDest++ = '.';
|
||
/* fall through */
|
||
|
||
case ACTION_EMIT_DOT_CH :
|
||
*pszDest++ = '.';
|
||
/* fall through */
|
||
|
||
case ACTION_EMIT_CH :
|
||
*pszDest++ = (CHAR ) ch;
|
||
/* fall through */
|
||
|
||
case ACTION_NOTHING :
|
||
break;
|
||
|
||
case ACTION_BACKUP :
|
||
if( (pszDest > ( (UCHAR *) pszPath + 1 ) ) && (*pszPath == '/'))
|
||
{
|
||
pszDest--;
|
||
DBG_ASSERT( *pszDest == '/' );
|
||
|
||
*pszDest = '\0';
|
||
pszDest = (UCHAR *) strrchr( pszPath, '/') + 1;
|
||
}
|
||
|
||
*pszDest = '\0';
|
||
break;
|
||
|
||
default :
|
||
DBG_ASSERT( !"Invalid action code in state table!" );
|
||
index = IndexFromState(0) + 2; // move to invalid state
|
||
DBG_ASSERT( p_StateTable[index] == 4);
|
||
*pszDest++ = '\0';
|
||
break;
|
||
}
|
||
|
||
} while( p_StateTable[index] != 4 );
|
||
|
||
//
|
||
// point to terminating nul
|
||
//
|
||
if (p_ActionTable[index] == ACTION_EMIT_CH) {
|
||
pszDest--;
|
||
}
|
||
|
||
DBG_ASSERT(*pszDest == '\0' && pszDest > (UCHAR*) pszPath);
|
||
|
||
return DIFF(pszDest - (UCHAR*)pszPath);
|
||
} // CanonURL()
|
||
|
||
|
||
|
||
DWORD
|
||
InitializeStringFunctions(
|
||
VOID
|
||
)
|
||
/*++
|
||
Initializes the string function pointers depending on the system code page.
|
||
If the code page doesn't have multi-byte characters, then pointers
|
||
resolve to regular single byte functions. Otherwise, they resolve to more
|
||
expense multi-byte functions.
|
||
|
||
Arguments:
|
||
None
|
||
|
||
Returns:
|
||
0 if successful, else Win32 Error
|
||
|
||
--*/
|
||
{
|
||
CPINFO CodePageInfo;
|
||
BOOL bRet;
|
||
HKEY hKey;
|
||
DWORD dwRet;
|
||
|
||
bRet = GetCPInfo( CP_ACP, &CodePageInfo );
|
||
|
||
if ( bRet && CodePageInfo.MaxCharSize == 1 )
|
||
{
|
||
g_pfnStrlwr = (PFNSTRCASE) _strlwr;
|
||
g_pfnStrupr = (PFNSTRCASE) _strupr;
|
||
g_pfnStrnicmp = (PFNSTRNICMP) _strnicmp;
|
||
g_pfnStricmp = (PFNSTRICMP) _stricmp;
|
||
g_pfnStrlen = (PFNSTRLEN) strlen;
|
||
g_pfnStrrchr = (PFNSTRRCHR) strrchr;
|
||
}
|
||
|
||
//
|
||
// Do we need to hack for Korean?
|
||
//
|
||
|
||
dwRet = RegOpenKeyEx( HKEY_LOCAL_MACHINE,
|
||
UTF8_HACK_KEY,
|
||
0,
|
||
KEY_READ,
|
||
&hKey );
|
||
if ( dwRet == ERROR_SUCCESS )
|
||
{
|
||
DWORD dwValue = 0;
|
||
DWORD cbValue = sizeof( dwValue );
|
||
|
||
dwRet = RegQueryValueEx( hKey,
|
||
UTF8_HACK_VALUE,
|
||
NULL,
|
||
NULL,
|
||
(LPBYTE) &dwValue,
|
||
&cbValue );
|
||
if ( dwRet == ERROR_SUCCESS )
|
||
{
|
||
g_fFavorDBCS = !!dwValue;
|
||
}
|
||
|
||
DBG_REQUIRE( RegCloseKey( hKey ) == ERROR_SUCCESS );
|
||
}
|
||
|
||
return ERROR_SUCCESS;
|
||
}
|
||
|
||
UCHAR *
|
||
IISstrupr(
|
||
UCHAR * pszString
|
||
)
|
||
/*++
|
||
Wrapper for strupr() call.
|
||
|
||
Arguments:
|
||
pszString - String to uppercase
|
||
|
||
Returns:
|
||
Pointer to string uppercased
|
||
|
||
--*/
|
||
{
|
||
DBG_ASSERT( g_pfnStrupr != NULL );
|
||
|
||
return g_pfnStrupr( pszString );
|
||
}
|
||
|
||
UCHAR *
|
||
IISstrlwr(
|
||
UCHAR * pszString
|
||
)
|
||
/*++
|
||
Wrapper for strlwr() call.
|
||
|
||
Arguments:
|
||
pszString - String to lowercase
|
||
|
||
Returns:
|
||
Pointer to string lowercased
|
||
|
||
--*/
|
||
{
|
||
DBG_ASSERT( g_pfnStrlwr != NULL );
|
||
|
||
return g_pfnStrlwr( pszString );
|
||
}
|
||
|
||
size_t
|
||
IISstrlen(
|
||
UCHAR * pszString
|
||
)
|
||
/*++
|
||
Wrapper for strlen() call.
|
||
|
||
Arguments:
|
||
pszString - String to check
|
||
|
||
Returns:
|
||
Length of string
|
||
|
||
--*/
|
||
{
|
||
DBG_ASSERT( g_pfnStrlen != NULL );
|
||
|
||
return g_pfnStrlen( pszString );
|
||
}
|
||
|
||
INT
|
||
IISstrnicmp(
|
||
UCHAR * pszString1,
|
||
UCHAR * pszString2,
|
||
size_t size
|
||
)
|
||
/*++
|
||
Wrapper for strnicmp() call.
|
||
|
||
Arguments:
|
||
pszString1 - String1
|
||
pszString2 - String2
|
||
size - # characters to compare upto
|
||
|
||
Returns:
|
||
0 if equal, -1 if pszString1 < pszString2, else 1
|
||
|
||
--*/
|
||
{
|
||
DBG_ASSERT( g_pfnStrnicmp != NULL );
|
||
|
||
return g_pfnStrnicmp( pszString1, pszString2, size );
|
||
}
|
||
|
||
|
||
INT
|
||
IISstricmp(
|
||
UCHAR * pszString1,
|
||
UCHAR * pszString2
|
||
)
|
||
/*++
|
||
Wrapper for stricmp() call.
|
||
|
||
Arguments:
|
||
pszString1 - String1
|
||
pszString2 - String2
|
||
|
||
Returns:
|
||
0 if equal, -1 if pszString1 < pszString2, else 1
|
||
|
||
--*/
|
||
{
|
||
DBG_ASSERT( g_pfnStricmp != NULL );
|
||
|
||
return g_pfnStricmp( pszString1, pszString2 );
|
||
}
|
||
|
||
|
||
// like strncpy, but doesn't pad the end of the string with zeroes, which
|
||
// is expensive when `source' is short and `count' is large
|
||
char *
|
||
IISstrncpy(
|
||
char * dest,
|
||
const char * source,
|
||
size_t count)
|
||
{
|
||
char *start = dest;
|
||
|
||
while (count && (*dest++ = *source++)) /* copy string */
|
||
count--;
|
||
|
||
if (count) /* append one zero */
|
||
*dest = '\0';
|
||
|
||
return(start);
|
||
}
|
||
|
||
UCHAR *
|
||
IISstrrchr(
|
||
const UCHAR * pszString,
|
||
UINT c
|
||
)
|
||
/*++
|
||
Wrapper for strrchr() call.
|
||
|
||
Arguments:
|
||
pszString - String
|
||
c - Character to find.
|
||
|
||
Returns:
|
||
pointer to the char or NULL.
|
||
|
||
--*/
|
||
{
|
||
DBG_ASSERT( g_pfnStrrchr != NULL );
|
||
|
||
return g_pfnStrrchr( pszString, c );
|
||
}
|
||
|