windows-nt/Source/XPSP1/NT/inetsrv/query/qutil/querylib/cgiesc.cxx
2020-09-26 16:20:57 +08:00

323 lines
8.9 KiB
C++

//+---------------------------------------------------------------------------
//
// Microsoft Windows
// Copyright (C) Microsoft Corporation, 1996 - 1998.
//
// File: cgiesc.cxx
//
// Contents: WEB CGI escape & unescape classes
//
// History: 96/Jan/3 DwightKr Created
//
//----------------------------------------------------------------------------
#include <pch.cxx>
#pragma hdrstop
#include <cgiesc.hxx>
//+---------------------------------------------------------------------------
//
// Function: DecodeURLEscapes - Decode URL escapes
//
// Synopsis: Removes the escape characters from a string, converting to
// Unicode along the way.
//
// Arguments: [pIn] - string to convert
// [l] - length of string in chars, updated on return
// [pOut] - converted string
// [ulCodePage] - code page for translation
//
//----------------------------------------------------------------------------
void DecodeURLEscapes( BYTE * pIn, ULONG & l, WCHAR * pOut, ULONG ulCodePage )
{
WCHAR * p2 = pOut;
WCHAR c1;
WCHAR c2;
XArray<BYTE> xDeferBuf;
BYTE * pDefer = 0;
ULONG l2 = l;
for( ; l2; l2-- )
{
BOOL fSaveAsUnicode = FALSE;
// Convert ASCII to corresponding character
// If Latin-1 character, save for MB translation, accumulate char
// If Unicode escape, flush accumulated chars and save converted char
c1 = *pIn;
//
// Spaces are escaped by converting them into plus signs.
// Convert them back.
//
if ( c1 == '+' )
{
c1 = ' ';
pIn++;
}
else if (c1 == '%')
{
//
// Special characters are converted to values of the format %XY
// where XY is the HEX code for the ASCII character.
//
// A percent sign is transmitted as %%.
//
if (*(pIn+1) == '%')
{
c1 = '%';
pIn += 2;
l2--;
}
else if (l2 >= 3)
{
pIn++;
c1 = (WCHAR) toupper(*pIn);
c2 = (WCHAR) toupper(*(pIn+1));
if ( c1 == 'U' && l2 >= 6 )
{
// Unicode escape, %uxxxx
c1 = c2;
c2 = (WCHAR) toupper(*(pIn+2));
WCHAR c3 = (WCHAR) toupper(*(pIn+3));
WCHAR c4 = (WCHAR) toupper(*(pIn+4));
if ( isxdigit( c1 ) && isxdigit( c2 ) &&
isxdigit( c3 ) && isxdigit( c4 ) )
{
c1 = ((c1 >= 'A') ? (c1-'A')+10 : c1-'0') << 12;
c1 += ((c2 >= 'A') ? (c2-'A')+10 : c2-'0') << 8;
c1 += ((c3 >= 'A') ? (c3-'A')+10 : c3-'0') << 4;
c1 += ((c4 >= 'A') ? (c4-'A')+10 : c4-'0');
if ( pDefer )
{
unsigned cchDefer = CiPtrToUint( pDefer - xDeferBuf.GetPointer() );
cchDefer = MultiByteToWideChar( ulCodePage,
0,
(char *) xDeferBuf.GetPointer(),
cchDefer,
p2,
cchDefer );
Win4Assert( cchDefer != 0 );
pDefer = 0;
p2 += cchDefer;
}
pIn += 5;
l2 -= 5;
fSaveAsUnicode = TRUE;
}
else
{
c1 = '%';
}
}
else if ( isxdigit( c1 ) && isxdigit( c2 ) )
{
c1 = ( ((c1 >= 'A') ? (c1-'A')+10 : c1-'0')*16 +
((c2 >= 'A') ? (c2-'A')+10 : c2-'0') );
pIn += 2;
l2 -= 2;
if ( c1 >= 0x80 && 0 == pDefer )
{
// The character needs to be deferred for MBCS
// translation.
if (xDeferBuf.GetPointer() == 0)
{
xDeferBuf.Init( l2+1 );
}
pDefer = xDeferBuf.GetPointer();
}
}
else
c1 = '%';
}
else
{
pIn++;
if ( c1 >= 0x80 && 0 == pDefer )
{
// The character needs to be deferred for MBCS
// translation.
if (xDeferBuf.GetPointer() == 0)
{
xDeferBuf.Init( l2+1 );
}
pDefer = xDeferBuf.GetPointer();
}
}
}
else
{
pIn++;
}
if (! fSaveAsUnicode)
{
if ( c1 >= 0x80 && 0 == pDefer )
{
// The character needs to be deferred for MBCS
// translation.
if (xDeferBuf.GetPointer() == 0)
{
xDeferBuf.Init( l2+1 );
}
pDefer = xDeferBuf.GetPointer();
}
}
else
{
Win4Assert( pDefer == 0 );
}
if (pDefer)
{
Win4Assert( c1 < 0x100 );
*pDefer++ = (BYTE) c1;
}
else
{
*p2++ = c1;
}
}
if ( pDefer )
{
unsigned cchDefer = CiPtrToUint( pDefer - xDeferBuf.GetPointer() );
cchDefer = MultiByteToWideChar( ulCodePage,
0,
(char *) xDeferBuf.GetPointer(),
cchDefer,
p2,
cchDefer );
Win4Assert( cchDefer != 0 );
pDefer = 0;
p2 += cchDefer;
}
*p2 = 0;
l = CiPtrToUlong( p2 - pOut );
}
void DecodeEscapes( WCHAR * p, ULONG & l )
{
DecodeEscapes( p, l, p );
}
void DecodeEscapes( WCHAR * pIn, ULONG & l, WCHAR * pOut )
{
WCHAR * p2;
int c1;
int c2;
ULONG l2 = l;
for( p2=pOut; l2; l2-- )
{
//
// Spaces are escaped by converting them into plus signs.
// Convert them back.
//
if ( *pIn == L'+' )
{
*p2++ = L' ';
pIn++;
}
else if (*pIn == L'%')
{
//
// Special characters are converted to values of the format %XY
// where XY is the HEX code for the ASCII character.
//
// A percent sign is transmitted as %%.
//
if (*(pIn+1) == L'%')
{
*p2++ = L'%';
pIn += 2;
l2--;
}
else if (l2 > 2)
{
pIn++;
c1=towupper(*pIn);
c2=towupper(*(pIn+1));
if ( isxdigit( c1 ) && isxdigit( c2 ) )
{
*p2++ = ( ((c1 >= L'A') ? (c1-L'A')+10 : c1-L'0')*16 +
((c2 >= L'A') ? (c2-L'A')+10 : c2-L'0')
);
pIn += 2;
l2 -= 2;
}
else
*p2++ = L'%';
}
else
{
*p2++ = *pIn++;
}
}
else
{
*p2++ = *pIn++;
}
}
*p2 = 0;
l = CiPtrToUlong( p2 - pOut );
}
//+---------------------------------------------------------------------------
//
// Function: DecodeHtmlNumeric - decode HTML numeric entity
//
// Synopsis: Looks for sequences like "&#12345;" and converts in-place
// to a single unicode character.
//
// Arguments: [pIn] - string to convert
//
//----------------------------------------------------------------------------
void DecodeHtmlNumeric( WCHAR * pIn )
{
pIn = wcschr( pIn, L'&' );
WCHAR * p2 = pIn;
while (pIn && *pIn)
{
if (*pIn == L'&' && pIn[1] == L'#')
{
pIn += 2;
USHORT ch = 0;
while (*pIn && *pIn != L';')
{
if (*pIn >= L'0' && *pIn <= L'9')
ch = ch*10 + (*pIn - L'0');
pIn++;
}
if (*pIn)
pIn++;
*p2++ = ch;
}
else
{
*p2++ = *pIn++;
}
}
if (p2)
*p2 = 0;
}