/*++ Copyright (c) 1998-2001 Microsoft Corporation Module Name: parsep.h Abstract: Contains all of the kernel mode HTTP parsing code. Author: Henry Sanders (henrysa) 04-May-1998 Revision History: --*/ #ifndef _PARSEP_H_ #define _PARSEP_H_ #ifdef __cplusplus extern "C" { #endif // // External variables. // extern PUSHORT NlsLeadByteInfo; // // Constants // #define MIN_VERSION_SIZE (sizeof("HTTP/1.1") - 1) #define MAX_VERB_LENGTH (sizeof("PROPPATCH")) #define HTTP_11_VERSION 0x312e312f50545448 #define HTTP_10_VERSION 0x302e312f50545448 #define UPCASE_MASK ((ULONGLONG)0xdfdfdfdfdfdfdfdf) #define MAX_HEADER_LONG_COUNT (3) #define MAX_HEADER_LENGTH (MAX_HEADER_LONG_COUNT * sizeof(ULONGLONG)) #define NUMBER_HEADER_INDICIES (26) #define NUMBER_HEADER_HINT_INDICIES (8) // // Default Server: header if none provided by the application. // #define DEFAULT_SERVER_HDR "Microsoft-IIS/6.0" #define DEFAULT_SERVER_HDR_LENGTH (sizeof(DEFAULT_SERVER_HDR) - sizeof(CHAR)) // // One second in 100ns system time units. Used for generating // Date: headers. // #define ONE_SECOND 10000000 // // Size of Connection: header values // #define CONN_CLOSE_HDR "close" #define CONN_CLOSE_HDR_LENGTH (sizeof(CONN_CLOSE_HDR) - sizeof(CHAR)) #define CONN_KEEPALIVE_HDR "keep-alive" #define CONN_KEEPALIVE_HDR_LENGTH (sizeof(CONN_KEEPALIVE_HDR) - sizeof(CHAR)) // // These are backwards because of little endian. // #define HTTP_PREFIX 'PTTH' #define HTTP_PREFIX_SIZE 4 #define HTTP_PREFIX_MASK 0xdfdfdfdf #define HTTP_PREFIX1 '\0//:' #define HTTP_PREFIX1_SIZE 3 #define HTTP_PREFIX1_MASK 0x00ffffff #define HTTP_PREFIX2 '//:S' #define HTTP_PREFIX2_SIZE 4 #define HTTP_PREFIX2_MASK 0xffffffdf typedef NTSTATUS (*PFN_SERVER_HEADER_HANDLER)( PUL_INTERNAL_REQUEST pRequest, PUCHAR pHttpRequest, ULONG HttpRequestLength, HTTP_HEADER_ID HeaderID, ULONG * pBytesTaken ); typedef NTSTATUS (*PFN_CLIENT_HEADER_HANDLER)( PHTTP_KNOWN_HEADER pKnownHeaders, PUCHAR *pOutBufferHead, PUCHAR *pOutBufferTail, PULONG BytesAvailable, PUCHAR pHeader, ULONG HeaderLength, HTTP_HEADER_ID HeaderID, ULONG * pBytesTaken ); // // Structure of the fast verb lookup table. The table consists of a series of // entries where each entry contains an HTTP verb represented as a ulonglong, // a mask to use for comparing that verb, the length of the verb and the // translated id. // typedef struct _FAST_VERB_ENTRY { union { UCHAR Char[sizeof(ULONGLONG)+1]; ULONGLONG LongLong; } RawVerb; ULONGLONG RawVerbMask; ULONG RawVerbLength; HTTP_VERB TranslatedVerb; } FAST_VERB_ENTRY, *PFAST_VERB_ENTRY; // // Stucture of the all verb lookup table. This table holds all verbs that // we understand, including those that are too long to fit in the fast // verb table. // typedef struct _LONG_VERB_ENTRY { ULONG RawVerbLength; UCHAR RawVerb[MAX_VERB_LENGTH]; HTTP_VERB TranslatedVerb; } LONG_VERB_ENTRY, *PLONG_VERB_ENTRY; // // Structure for a header map entry. Each header map entry contains a // verb and a series of masks to use in checking that verb. // typedef struct _HEADER_MAP_ENTRY { ULONG HeaderLength; ULONG ArrayCount; ULONG MinBytesNeeded; union { UCHAR HeaderChar[MAX_HEADER_LENGTH]; ULONGLONG HeaderLong[MAX_HEADER_LONG_COUNT]; } Header; ULONGLONG HeaderMask[MAX_HEADER_LONG_COUNT]; UCHAR MixedCaseHeader[MAX_HEADER_LENGTH]; HTTP_HEADER_ID HeaderID; BOOLEAN AutoGenerate; PFN_SERVER_HEADER_HANDLER pServerHandler; PFN_CLIENT_HEADER_HANDLER pClientHandler; LONG HintIndex; } HEADER_MAP_ENTRY, *PHEADER_MAP_ENTRY; // // Structure for a header index table entry. // typedef struct _HEADER_INDEX_ENTRY { PHEADER_MAP_ENTRY pHeaderMap; ULONG Count; } HEADER_INDEX_ENTRY, *PHEADER_INDEX_ENTRY; // // Structure for a header hint index table entry. // typedef struct _HEADER_HINT_INDEX_ENTRY { PHEADER_MAP_ENTRY pHeaderMap; UCHAR c; } HEADER_HINT_INDEX_ENTRY, *PHEADER_HINT_INDEX_ENTRY, **PPHEADER_HINT_INDEX_ENTRY; // // A (complex) macro to create a mask for a header map entry, // given the header length and the mask offset (in bytes). This // mask will need to be touched up for non-alphabetic characters. // #define CREATE_HEADER_MASK(hlength, maskoffset) \ ((hlength) > (maskoffset) ? UPCASE_MASK : \ (((maskoffset) - (hlength)) >= 8 ? 0 : \ (UPCASE_MASK >> ( ((maskoffset) - (hlength)) * (ULONGLONG)8)))) // // Macro for creating header map entries. The mask entries are created // by the init code. // #define CREATE_HEADER_MAP_ENTRY(header, ID, auto, serverhandler, clienthandler, HintIndex)\ { \ \ sizeof(#header) - 1, \ ((sizeof(#header) - 1) / 8) + \ (((sizeof(#header) - 1) % 8) == 0 ? 0 : 1), \ (((sizeof(#header) - 1) / 8) + \ (((sizeof(#header) - 1) % 8) == 0 ? 0 : 1)) * 8, \ { #header }, \ { 0, 0, 0}, \ { #header }, \ ID, \ auto, \ serverhandler, \ clienthandler, \ HintIndex \ } // // Macro for defining fast verb table entries. Note that we don't subtrace 1 // from the various sizeof occurences because we'd just have to add it back // in to account for the seperating space. // #define CREATE_FAST_VERB_ENTRY(verb) { {#verb " "}, \ (0xffffffffffffffff >> \ ((8 - (sizeof(#verb))) * 8)), \ (sizeof(#verb)), HttpVerb##verb } // // Macro for defining all verb table entries. // #define CREATE_LONG_VERB_ENTRY(verb) { sizeof(#verb) - 1, \ #verb,\ HttpVerb##verb } #define IS_UTF8_TRAILBYTE(ch) (((ch) & 0xc0) == 0x80) NTSTATUS CheckForAbsoluteUrl( IN PUL_INTERNAL_REQUEST pRequest, IN PUCHAR pURL, IN ULONG URLLength, IN PUCHAR * pHostPtr, IN ULONG * BytesTaken ); NTSTATUS LookupVerb( IN PUL_INTERNAL_REQUEST pRequest, IN PUCHAR pHttpRequest, IN ULONG HttpRequestLength, OUT ULONG * pBytesTaken ); NTSTATUS UlParseHeaderWithHint( IN PUL_INTERNAL_REQUEST pRequest, IN PUCHAR pHttpRequest, IN ULONG HttpRequestLength, IN PHEADER_MAP_ENTRY pHeaderHintMap, OUT ULONG * pBytesTaken ); NTSTATUS UlParseHeader( IN PUL_INTERNAL_REQUEST pRequest, IN PUCHAR pHttpRequest, IN ULONG HttpRequestLength, OUT ULONG * pBytesTaken ); NTSTATUS UlLookupHeader( IN PUL_INTERNAL_REQUEST pRequest, IN PUCHAR pHttpRequest, IN ULONG HttpRequestLength, IN PHEADER_MAP_ENTRY pCurrentHeaderMap, IN ULONG HeaderMapCount, OUT ULONG * pBytesTaken ); typedef enum _URL_PART { Scheme, HostName, AbsPath, QueryString } URL_PART; typedef enum _URL_TYPE { UrlTypeUtf8, UrlTypeAnsi, UrlTypeDbcs } URL_TYPE; NTSTATUS UlpCleanAndCopyUrl( IN URL_PART UrlPart, IN OUT PWSTR pDestination, IN PUCHAR pSource, IN ULONG SourceLength, OUT PULONG pBytesCopied, OUT PWSTR * ppQueryString OPTIONAL, OUT PULONG pUrlHash ); NTSTATUS UlpCleanAndCopyUrlByType( IN URL_TYPE UrlType, IN URL_PART UrlPart, IN OUT PWSTR pDestination, IN PUCHAR pSource, IN ULONG SourceLength, OUT PULONG pBytesCopied, OUT PWSTR * ppQueryString OPTIONAL, OUT PULONG pUrlHash ); NTSTATUS Unescape( IN PUCHAR pChar, OUT PUCHAR pOutChar ); // // PopChar is used only if the string is not UTF-8, or UrlPart != QueryString, // or the current character is '%' or its high bit is set. In all other cases, // the FastPopChars table is used for fast conversion. // __inline NTSTATUS FASTCALL PopChar( IN URL_TYPE UrlType, IN URL_PART UrlPart, IN PUCHAR pChar, OUT WCHAR * pUnicodeChar, OUT PULONG pCharToSkip ) { NTSTATUS Status; WCHAR UnicodeChar; UCHAR Char; UCHAR Trail1; UCHAR Trail2; ULONG CharToSkip; // // Sanity check. // PAGED_CODE(); // // validate it as a valid url character // if (UrlPart != QueryString) { if (IS_URL_TOKEN(pChar[0]) == FALSE) { Status = STATUS_OBJECT_PATH_SYNTAX_BAD; UlTrace(PARSER, ( "ul!PopChar(pChar = %p) first char isn't URL token\n", pChar )); goto end; } } else { // // Allow anything but linefeed in the query string. // if (pChar[0] == LF) { Status = STATUS_OBJECT_PATH_SYNTAX_BAD; UlTrace(PARSER, ( "ul!PopChar(pChar = %p) linefeed in query string\n", pChar )); goto end; } UnicodeChar = (USHORT) pChar[0]; CharToSkip = 1; // skip all the decoding stuff goto slash; } // // need to unescape ? // // can't decode the query string. that would be lossy decodeing // as '=' and '&' characters might be encoded, but have meaning // to the usermode parser. // if (pChar[0] == '%') { Status = Unescape(pChar, &Char); if (NT_SUCCESS(Status) == FALSE) goto end; CharToSkip = 3; } else { Char = pChar[0]; CharToSkip = 1; } if (UrlType == UrlTypeUtf8) { // // convert to unicode, checking for utf8 . // // 3 byte runs are the largest we can have. 16 bits in UCS-2 = // 3 bytes of (4+4,2+6,2+6) where it's code + char. // for a total of 6+6+4 char bits = 16 bits. // // // NOTE: we'll only bother to decode utf if it was escaped // thus the (CharToSkip == 3) // if ((CharToSkip == 3) && ((Char & 0xf0) == 0xe0)) { // 3 byte run // // Unescape the next 2 trail bytes // Status = Unescape(pChar+CharToSkip, &Trail1); if (NT_SUCCESS(Status) == FALSE) goto end; CharToSkip += 3; // %xx Status = Unescape(pChar+CharToSkip, &Trail2); if (NT_SUCCESS(Status) == FALSE) goto end; CharToSkip += 3; // %xx if (IS_UTF8_TRAILBYTE(Trail1) == FALSE || IS_UTF8_TRAILBYTE(Trail2) == FALSE) { // bad utf! // Status = STATUS_OBJECT_PATH_SYNTAX_BAD; UlTrace(PARSER, ( "ul!PopChar( 0x%x 0x%x ) bad trail bytes\n", Trail1, Trail2 )); goto end; } // handle three byte case // 1110xxxx 10xxxxxx 10xxxxxx UnicodeChar = (USHORT) (((Char & 0x0f) << 12) | ((Trail1 & 0x3f) << 6) | (Trail2 & 0x3f)); } else if ((CharToSkip == 3) && ((Char & 0xe0) == 0xc0)) { // 2 byte run // // Unescape the next 1 trail byte // Status = Unescape(pChar+CharToSkip, &Trail1); if (NT_SUCCESS(Status) == FALSE) goto end; CharToSkip += 3; // %xx if (IS_UTF8_TRAILBYTE(Trail1) == FALSE) { // bad utf! // Status = STATUS_OBJECT_PATH_SYNTAX_BAD; UlTrace(PARSER, ( "ul!PopChar( 0x%x ) bad trail byte\n", Trail1 )); goto end; } // handle two byte case // 110xxxxx 10xxxxxx UnicodeChar = (USHORT) (((Char & 0x1f) << 6) | (Trail1 & 0x3f)); } // now this can either be unescaped high-bit (bad) // or escaped high-bit. (also bad) // // thus not checking CharToSkip // else if ((Char & 0x80) == 0x80) { // high bit set ! bad utf! // Status = STATUS_OBJECT_PATH_SYNTAX_BAD; UlTrace(PARSER, ( "ul!PopChar( 0x%x ) ERROR: high bit set! bad utf!\n", Char )); goto end; } // // Normal character (again either escaped or unescaped) // else { // // Simple conversion to unicode, it's 7-bit ascii. // UnicodeChar = (USHORT)Char; } } else // UrlType != UrlTypeUtf8 { UCHAR AnsiChar[2]; ULONG AnsiCharSize; // // Convert ANSI character to Unicode. // If the UrlType is UrlTypeDbcs, then we may have // a DBCS lead/trail pair. // if (UrlType == UrlTypeDbcs && NlsLeadByteInfo[Char]) { // // This is a double-byte character. // AnsiCharSize = 2; AnsiChar[0] = Char; Status = Unescape(pChar+CharToSkip, &AnsiChar[1]); if (!NT_SUCCESS(Status)) { goto end; } CharToSkip += 3; // %xx } else { // // This is a single-byte character. // AnsiCharSize = 1; AnsiChar[0] = Char; } Status = RtlMultiByteToUnicodeN( &UnicodeChar, sizeof(WCHAR), NULL, (PCHAR) &AnsiChar[0], AnsiCharSize ); if (!NT_SUCCESS(Status)) { goto end; } } slash: // // turn backslashes into forward slashes // if (UrlPart != QueryString && UnicodeChar == L'\\') { UnicodeChar = L'/'; } else if (UnicodeChar == UNICODE_NULL) { // // we pop'd a NULL. bad! // Status = STATUS_OBJECT_PATH_SYNTAX_BAD; goto end; } *pCharToSkip = CharToSkip; *pUnicodeChar = UnicodeChar; Status = STATUS_SUCCESS; end: return Status; } // PopChar // Call this only after the entire request has been parsed // NTSTATUS UlpCookUrl( IN PUL_INTERNAL_REQUEST pRequest ); ULONG UlpParseHttpVersion( PUCHAR pString, ULONG StringLength, PHTTP_VERSION pVersion ); #ifdef __cplusplus }; // extern "C" #endif #endif // _PARSEP_H_