windows-nt/Source/XPSP1/NT/inetsrv/iis/iisrearc/ul/drv/parsep.h
2020-09-26 16:20:57 +08:00

675 lines
16 KiB
C

/*++
Copyright (c) 1998-2001 Microsoft Corporation
Module Name:
parsep.h
Abstract:
Contains all of the kernel mode HTTP parsing code.
Author:
Henry Sanders (henrysa) 04-May-1998
Revision History:
--*/
#ifndef _PARSEP_H_
#define _PARSEP_H_
#ifdef __cplusplus
extern "C" {
#endif
//
// External variables.
//
extern PUSHORT NlsLeadByteInfo;
//
// Constants
//
#define MIN_VERSION_SIZE (sizeof("HTTP/1.1") - 1)
#define MAX_VERB_LENGTH (sizeof("PROPPATCH"))
#define HTTP_11_VERSION 0x312e312f50545448
#define HTTP_10_VERSION 0x302e312f50545448
#define UPCASE_MASK ((ULONGLONG)0xdfdfdfdfdfdfdfdf)
#define MAX_HEADER_LONG_COUNT (3)
#define MAX_HEADER_LENGTH (MAX_HEADER_LONG_COUNT * sizeof(ULONGLONG))
#define NUMBER_HEADER_INDICIES (26)
#define NUMBER_HEADER_HINT_INDICIES (8)
//
// Default Server: header if none provided by the application.
//
#define DEFAULT_SERVER_HDR "Microsoft-IIS/6.0"
#define DEFAULT_SERVER_HDR_LENGTH (sizeof(DEFAULT_SERVER_HDR) - sizeof(CHAR))
//
// One second in 100ns system time units. Used for generating
// Date: headers.
//
#define ONE_SECOND 10000000
//
// Size of Connection: header values
//
#define CONN_CLOSE_HDR "close"
#define CONN_CLOSE_HDR_LENGTH (sizeof(CONN_CLOSE_HDR) - sizeof(CHAR))
#define CONN_KEEPALIVE_HDR "keep-alive"
#define CONN_KEEPALIVE_HDR_LENGTH (sizeof(CONN_KEEPALIVE_HDR) - sizeof(CHAR))
//
// These are backwards because of little endian.
//
#define HTTP_PREFIX 'PTTH'
#define HTTP_PREFIX_SIZE 4
#define HTTP_PREFIX_MASK 0xdfdfdfdf
#define HTTP_PREFIX1 '\0//:'
#define HTTP_PREFIX1_SIZE 3
#define HTTP_PREFIX1_MASK 0x00ffffff
#define HTTP_PREFIX2 '//:S'
#define HTTP_PREFIX2_SIZE 4
#define HTTP_PREFIX2_MASK 0xffffffdf
typedef NTSTATUS (*PFN_SERVER_HEADER_HANDLER)(
PUL_INTERNAL_REQUEST pRequest,
PUCHAR pHttpRequest,
ULONG HttpRequestLength,
HTTP_HEADER_ID HeaderID,
ULONG * pBytesTaken
);
typedef NTSTATUS (*PFN_CLIENT_HEADER_HANDLER)(
PHTTP_KNOWN_HEADER pKnownHeaders,
PUCHAR *pOutBufferHead,
PUCHAR *pOutBufferTail,
PULONG BytesAvailable,
PUCHAR pHeader,
ULONG HeaderLength,
HTTP_HEADER_ID HeaderID,
ULONG * pBytesTaken
);
//
// Structure of the fast verb lookup table. The table consists of a series of
// entries where each entry contains an HTTP verb represented as a ulonglong,
// a mask to use for comparing that verb, the length of the verb and the
// translated id.
//
typedef struct _FAST_VERB_ENTRY
{
union
{
UCHAR Char[sizeof(ULONGLONG)+1];
ULONGLONG LongLong;
} RawVerb;
ULONGLONG RawVerbMask;
ULONG RawVerbLength;
HTTP_VERB TranslatedVerb;
} FAST_VERB_ENTRY, *PFAST_VERB_ENTRY;
//
// Stucture of the all verb lookup table. This table holds all verbs that
// we understand, including those that are too long to fit in the fast
// verb table.
//
typedef struct _LONG_VERB_ENTRY
{
ULONG RawVerbLength;
UCHAR RawVerb[MAX_VERB_LENGTH];
HTTP_VERB TranslatedVerb;
} LONG_VERB_ENTRY, *PLONG_VERB_ENTRY;
//
// Structure for a header map entry. Each header map entry contains a
// verb and a series of masks to use in checking that verb.
//
typedef struct _HEADER_MAP_ENTRY
{
ULONG HeaderLength;
ULONG ArrayCount;
ULONG MinBytesNeeded;
union
{
UCHAR HeaderChar[MAX_HEADER_LENGTH];
ULONGLONG HeaderLong[MAX_HEADER_LONG_COUNT];
} Header;
ULONGLONG HeaderMask[MAX_HEADER_LONG_COUNT];
UCHAR MixedCaseHeader[MAX_HEADER_LENGTH];
HTTP_HEADER_ID HeaderID;
BOOLEAN AutoGenerate;
PFN_SERVER_HEADER_HANDLER pServerHandler;
PFN_CLIENT_HEADER_HANDLER pClientHandler;
LONG HintIndex;
} HEADER_MAP_ENTRY, *PHEADER_MAP_ENTRY;
//
// Structure for a header index table entry.
//
typedef struct _HEADER_INDEX_ENTRY
{
PHEADER_MAP_ENTRY pHeaderMap;
ULONG Count;
} HEADER_INDEX_ENTRY, *PHEADER_INDEX_ENTRY;
//
// Structure for a header hint index table entry.
//
typedef struct _HEADER_HINT_INDEX_ENTRY
{
PHEADER_MAP_ENTRY pHeaderMap;
UCHAR c;
} HEADER_HINT_INDEX_ENTRY, *PHEADER_HINT_INDEX_ENTRY, **PPHEADER_HINT_INDEX_ENTRY;
//
// A (complex) macro to create a mask for a header map entry,
// given the header length and the mask offset (in bytes). This
// mask will need to be touched up for non-alphabetic characters.
//
#define CREATE_HEADER_MASK(hlength, maskoffset) \
((hlength) > (maskoffset) ? UPCASE_MASK : \
(((maskoffset) - (hlength)) >= 8 ? 0 : \
(UPCASE_MASK >> ( ((maskoffset) - (hlength)) * (ULONGLONG)8))))
//
// Macro for creating header map entries. The mask entries are created
// by the init code.
//
#define CREATE_HEADER_MAP_ENTRY(header, ID, auto, serverhandler, clienthandler, HintIndex)\
{ \
\
sizeof(#header) - 1, \
((sizeof(#header) - 1) / 8) + \
(((sizeof(#header) - 1) % 8) == 0 ? 0 : 1), \
(((sizeof(#header) - 1) / 8) + \
(((sizeof(#header) - 1) % 8) == 0 ? 0 : 1)) * 8, \
{ #header }, \
{ 0, 0, 0}, \
{ #header }, \
ID, \
auto, \
serverhandler, \
clienthandler, \
HintIndex \
}
//
// Macro for defining fast verb table entries. Note that we don't subtrace 1
// from the various sizeof occurences because we'd just have to add it back
// in to account for the seperating space.
//
#define CREATE_FAST_VERB_ENTRY(verb) { {#verb " "}, \
(0xffffffffffffffff >> \
((8 - (sizeof(#verb))) * 8)), \
(sizeof(#verb)), HttpVerb##verb }
//
// Macro for defining all verb table entries.
//
#define CREATE_LONG_VERB_ENTRY(verb) { sizeof(#verb) - 1, \
#verb,\
HttpVerb##verb }
#define IS_UTF8_TRAILBYTE(ch) (((ch) & 0xc0) == 0x80)
NTSTATUS
CheckForAbsoluteUrl(
IN PUL_INTERNAL_REQUEST pRequest,
IN PUCHAR pURL,
IN ULONG URLLength,
IN PUCHAR * pHostPtr,
IN ULONG * BytesTaken
);
NTSTATUS
LookupVerb(
IN PUL_INTERNAL_REQUEST pRequest,
IN PUCHAR pHttpRequest,
IN ULONG HttpRequestLength,
OUT ULONG * pBytesTaken
);
NTSTATUS
UlParseHeaderWithHint(
IN PUL_INTERNAL_REQUEST pRequest,
IN PUCHAR pHttpRequest,
IN ULONG HttpRequestLength,
IN PHEADER_MAP_ENTRY pHeaderHintMap,
OUT ULONG * pBytesTaken
);
NTSTATUS
UlParseHeader(
IN PUL_INTERNAL_REQUEST pRequest,
IN PUCHAR pHttpRequest,
IN ULONG HttpRequestLength,
OUT ULONG * pBytesTaken
);
NTSTATUS
UlLookupHeader(
IN PUL_INTERNAL_REQUEST pRequest,
IN PUCHAR pHttpRequest,
IN ULONG HttpRequestLength,
IN PHEADER_MAP_ENTRY pCurrentHeaderMap,
IN ULONG HeaderMapCount,
OUT ULONG * pBytesTaken
);
typedef enum _URL_PART
{
Scheme,
HostName,
AbsPath,
QueryString
} URL_PART;
typedef enum _URL_TYPE
{
UrlTypeUtf8,
UrlTypeAnsi,
UrlTypeDbcs
} URL_TYPE;
NTSTATUS
UlpCleanAndCopyUrl(
IN URL_PART UrlPart,
IN OUT PWSTR pDestination,
IN PUCHAR pSource,
IN ULONG SourceLength,
OUT PULONG pBytesCopied,
OUT PWSTR * ppQueryString OPTIONAL,
OUT PULONG pUrlHash
);
NTSTATUS
UlpCleanAndCopyUrlByType(
IN URL_TYPE UrlType,
IN URL_PART UrlPart,
IN OUT PWSTR pDestination,
IN PUCHAR pSource,
IN ULONG SourceLength,
OUT PULONG pBytesCopied,
OUT PWSTR * ppQueryString OPTIONAL,
OUT PULONG pUrlHash
);
NTSTATUS
Unescape(
IN PUCHAR pChar,
OUT PUCHAR pOutChar
);
//
// PopChar is used only if the string is not UTF-8, or UrlPart != QueryString,
// or the current character is '%' or its high bit is set. In all other cases,
// the FastPopChars table is used for fast conversion.
//
__inline
NTSTATUS
FASTCALL
PopChar(
IN URL_TYPE UrlType,
IN URL_PART UrlPart,
IN PUCHAR pChar,
OUT WCHAR * pUnicodeChar,
OUT PULONG pCharToSkip
)
{
NTSTATUS Status;
WCHAR UnicodeChar;
UCHAR Char;
UCHAR Trail1;
UCHAR Trail2;
ULONG CharToSkip;
//
// Sanity check.
//
PAGED_CODE();
//
// validate it as a valid url character
//
if (UrlPart != QueryString)
{
if (IS_URL_TOKEN(pChar[0]) == FALSE)
{
Status = STATUS_OBJECT_PATH_SYNTAX_BAD;
UlTrace(PARSER, (
"ul!PopChar(pChar = %p) first char isn't URL token\n",
pChar
));
goto end;
}
}
else
{
//
// Allow anything but linefeed in the query string.
//
if (pChar[0] == LF)
{
Status = STATUS_OBJECT_PATH_SYNTAX_BAD;
UlTrace(PARSER, (
"ul!PopChar(pChar = %p) linefeed in query string\n",
pChar
));
goto end;
}
UnicodeChar = (USHORT) pChar[0];
CharToSkip = 1;
// skip all the decoding stuff
goto slash;
}
//
// need to unescape ?
//
// can't decode the query string. that would be lossy decodeing
// as '=' and '&' characters might be encoded, but have meaning
// to the usermode parser.
//
if (pChar[0] == '%')
{
Status = Unescape(pChar, &Char);
if (NT_SUCCESS(Status) == FALSE)
goto end;
CharToSkip = 3;
}
else
{
Char = pChar[0];
CharToSkip = 1;
}
if (UrlType == UrlTypeUtf8)
{
//
// convert to unicode, checking for utf8 .
//
// 3 byte runs are the largest we can have. 16 bits in UCS-2 =
// 3 bytes of (4+4,2+6,2+6) where it's code + char.
// for a total of 6+6+4 char bits = 16 bits.
//
//
// NOTE: we'll only bother to decode utf if it was escaped
// thus the (CharToSkip == 3)
//
if ((CharToSkip == 3) && ((Char & 0xf0) == 0xe0))
{
// 3 byte run
//
// Unescape the next 2 trail bytes
//
Status = Unescape(pChar+CharToSkip, &Trail1);
if (NT_SUCCESS(Status) == FALSE)
goto end;
CharToSkip += 3; // %xx
Status = Unescape(pChar+CharToSkip, &Trail2);
if (NT_SUCCESS(Status) == FALSE)
goto end;
CharToSkip += 3; // %xx
if (IS_UTF8_TRAILBYTE(Trail1) == FALSE ||
IS_UTF8_TRAILBYTE(Trail2) == FALSE)
{
// bad utf!
//
Status = STATUS_OBJECT_PATH_SYNTAX_BAD;
UlTrace(PARSER, (
"ul!PopChar( 0x%x 0x%x ) bad trail bytes\n",
Trail1,
Trail2
));
goto end;
}
// handle three byte case
// 1110xxxx 10xxxxxx 10xxxxxx
UnicodeChar = (USHORT) (((Char & 0x0f) << 12) |
((Trail1 & 0x3f) << 6) |
(Trail2 & 0x3f));
}
else if ((CharToSkip == 3) && ((Char & 0xe0) == 0xc0))
{
// 2 byte run
//
// Unescape the next 1 trail byte
//
Status = Unescape(pChar+CharToSkip, &Trail1);
if (NT_SUCCESS(Status) == FALSE)
goto end;
CharToSkip += 3; // %xx
if (IS_UTF8_TRAILBYTE(Trail1) == FALSE)
{
// bad utf!
//
Status = STATUS_OBJECT_PATH_SYNTAX_BAD;
UlTrace(PARSER, (
"ul!PopChar( 0x%x ) bad trail byte\n",
Trail1
));
goto end;
}
// handle two byte case
// 110xxxxx 10xxxxxx
UnicodeChar = (USHORT) (((Char & 0x1f) << 6) |
(Trail1 & 0x3f));
}
// now this can either be unescaped high-bit (bad)
// or escaped high-bit. (also bad)
//
// thus not checking CharToSkip
//
else if ((Char & 0x80) == 0x80)
{
// high bit set ! bad utf!
//
Status = STATUS_OBJECT_PATH_SYNTAX_BAD;
UlTrace(PARSER, (
"ul!PopChar( 0x%x ) ERROR: high bit set! bad utf!\n",
Char
));
goto end;
}
//
// Normal character (again either escaped or unescaped)
//
else
{
//
// Simple conversion to unicode, it's 7-bit ascii.
//
UnicodeChar = (USHORT)Char;
}
}
else // UrlType != UrlTypeUtf8
{
UCHAR AnsiChar[2];
ULONG AnsiCharSize;
//
// Convert ANSI character to Unicode.
// If the UrlType is UrlTypeDbcs, then we may have
// a DBCS lead/trail pair.
//
if (UrlType == UrlTypeDbcs && NlsLeadByteInfo[Char])
{
//
// This is a double-byte character.
//
AnsiCharSize = 2;
AnsiChar[0] = Char;
Status = Unescape(pChar+CharToSkip, &AnsiChar[1]);
if (!NT_SUCCESS(Status))
{
goto end;
}
CharToSkip += 3; // %xx
}
else
{
//
// This is a single-byte character.
//
AnsiCharSize = 1;
AnsiChar[0] = Char;
}
Status = RtlMultiByteToUnicodeN(
&UnicodeChar,
sizeof(WCHAR),
NULL,
(PCHAR) &AnsiChar[0],
AnsiCharSize
);
if (!NT_SUCCESS(Status))
{
goto end;
}
}
slash:
//
// turn backslashes into forward slashes
//
if (UrlPart != QueryString && UnicodeChar == L'\\')
{
UnicodeChar = L'/';
}
else if (UnicodeChar == UNICODE_NULL)
{
//
// we pop'd a NULL. bad!
//
Status = STATUS_OBJECT_PATH_SYNTAX_BAD;
goto end;
}
*pCharToSkip = CharToSkip;
*pUnicodeChar = UnicodeChar;
Status = STATUS_SUCCESS;
end:
return Status;
} // PopChar
// Call this only after the entire request has been parsed
//
NTSTATUS
UlpCookUrl(
IN PUL_INTERNAL_REQUEST pRequest
);
ULONG
UlpParseHttpVersion(
PUCHAR pString,
ULONG StringLength,
PHTTP_VERSION pVersion
);
#ifdef __cplusplus
}; // extern "C"
#endif
#endif // _PARSEP_H_