509 lines
15 KiB
C++
509 lines
15 KiB
C++
/*--
|
|
Copyright (c) 1995-1998 Microsoft Corporation
|
|
Module Name: PARSER.CPP
|
|
Author: Arul Menezes
|
|
Abstract: HTTP request parser
|
|
--*/
|
|
#include "pch.h"
|
|
#pragma hdrstop
|
|
|
|
#include "httpd.h"
|
|
|
|
// This could be written as a state-machine parser, but for now I'm
|
|
// keeping it simple and slow :-(
|
|
|
|
// Methods
|
|
const char cszGET[] = "GET";
|
|
const char cszHEAD[] = "HEAD";
|
|
const char cszPOST[] = "POST";
|
|
// General headers
|
|
const char cszConnection[] = "Connection:";
|
|
//const char cszDate[] = "Date:";
|
|
//const char cszPragma[] = "Pragma:";
|
|
// Request headers
|
|
const char cszAuthorization[] = "Authorization:";
|
|
const char cszIfModifiedSince[] = "If-Modified-Since:";
|
|
//const char cszReferer[] = "Referer:";
|
|
//const char cszUserAgent[] = "User-Agent:";
|
|
const char cszCookie[] = "Cookie:";
|
|
const char cszAccept[] = "Accept:";
|
|
// Entity Headers
|
|
const char cszContentLength[] = "Content-Length:";
|
|
const char cszContentType[] = "Content-Type:";
|
|
|
|
// other Header tokens
|
|
// const char cszHTTPVER[] = "HTTP/%d.%d"; //
|
|
const char cszHTTPVER[] = "HTTP/";
|
|
const char cszBasic[] = "Basic";
|
|
const char cszNTLM[] = "NTLM";
|
|
|
|
#define PFNPARSE(x) &(CHttpRequest::Parse##x)
|
|
#define TABLEENTRY(csz, id, pfn) { csz, sizeof(csz)-1, id, PFNPARSE(pfn) }
|
|
#define AUTH_FILTER_DONE 0x1000 // no more filter calls to SF_AUTH after the 1st one in a session
|
|
|
|
typedef (CHttpRequest::*PFNPARSEPROC)(PCSTR pszTok, TOKEN idHeader);
|
|
|
|
typedef struct tagHeaderDesc
|
|
{
|
|
const char* sz;
|
|
int iLen;
|
|
TOKEN id;
|
|
PFNPARSEPROC pfn;
|
|
} HEADERDESC;
|
|
|
|
|
|
const HEADERDESC rgHeaders[] =
|
|
{
|
|
//{ cszGET, sizeof(cszGET), TOK_GET, &CHttpRequest::ParseMethod },
|
|
// Methods
|
|
// TABLEENTRY(cszGET, TOK_GET, Method),
|
|
// TABLEENTRY(cszHEAD, TOK_HEAD, Method),
|
|
// TABLEENTRY(cszPOST, TOK_POST, Method),
|
|
// General headers
|
|
TABLEENTRY(cszConnection, TOK_CONNECTION, Connection),
|
|
//TABLEENTRY(cszDate, TOK_DATE, Date),
|
|
//TABLEENTRY(cszPragma, TOK_PRAGMA, Pragma),
|
|
// Request headers
|
|
TABLEENTRY(cszCookie, TOK_COOKIE, Cookie),
|
|
TABLEENTRY(cszAccept, TOK_ACCEPT, Accept),
|
|
//TABLEENTRY(cszReferer, TOK_REFERER Referer),
|
|
//TABLEENTRY(cszUserAgent,TOK_UAGENT, UserAgent),
|
|
TABLEENTRY(cszAuthorization, TOK_AUTH, Authorization),
|
|
TABLEENTRY(cszIfModifiedSince,TOK_IFMOD, IfModifiedSince),
|
|
// Entity Headers
|
|
//TABLEENTRY(cszContentEncoding, TOK_ENCODING Encoding),
|
|
TABLEENTRY(cszContentType, TOK_TYPE, ContentType),
|
|
TABLEENTRY(cszContentLength,TOK_LENGTH, ContentLength),
|
|
{ 0, 0, (TOKEN)0, 0}
|
|
};
|
|
|
|
// Parse all the headers, line by line
|
|
BOOL CHttpRequest::ParseHeaders()
|
|
{
|
|
DEBUG_CODE_INIT;
|
|
PSTR pszTok;
|
|
PWSTR pwszTemp;
|
|
PSTR pszPathInfo = NULL;
|
|
int i, iLen;
|
|
BOOL ret = FALSE;
|
|
|
|
|
|
if (!m_bufRequest.NextTokenWS(&pszTok, &iLen))
|
|
{
|
|
m_rs = STATUS_BADREQ;
|
|
myleave(287);
|
|
}
|
|
|
|
if (! ParseMethod(pszTok,iLen))
|
|
{
|
|
m_rs = STATUS_BADREQ;
|
|
myleave(288);
|
|
}
|
|
|
|
if (!m_bufRequest.NextLine())
|
|
{
|
|
m_rs = STATUS_BADREQ;
|
|
myleave(290);
|
|
}
|
|
|
|
// outer-loop. one header per iteration
|
|
while (m_bufRequest.NextTokenColon(&pszTok, &iLen))
|
|
{
|
|
// compare token with tokens in table
|
|
for (i=0; rgHeaders[i].sz; i++)
|
|
{
|
|
//TraceTag(ttidWebServer, "Comparing %s %d %d", rgHeaders[i].sz, rgHeaders[i].iLen, rgHeaders[i].pfn);
|
|
if ( (rgHeaders[i].iLen == iLen) &&
|
|
0==_memicmp(rgHeaders[i].sz, pszTok, iLen) )
|
|
break;
|
|
}
|
|
if (rgHeaders[i].pfn)
|
|
{
|
|
TraceTag(ttidWebServer, "Parsing %s", rgHeaders[i].sz);
|
|
// call the specific function to parse this header.
|
|
if (! ((this->*(rgHeaders[i].pfn))(pszTok, rgHeaders[i].id)) )
|
|
{
|
|
TraceTag(ttidWebServer, "Parser: failed to parse %s -- IGNORING", rgHeaders[i].sz);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
TraceTag(ttidWebServer, "Ignoring header %s", pszTok);
|
|
}
|
|
if (!m_bufRequest.NextLine())
|
|
{
|
|
m_rs = STATUS_BADREQ;
|
|
myleave(290);
|
|
}
|
|
}
|
|
|
|
if (!m_bufRequest.NextLine()) // eat the blank line
|
|
{
|
|
m_rs = STATUS_BADREQ;
|
|
myleave(290);
|
|
}
|
|
TraceTag(ttidWebServer, "Parser: DONE");
|
|
|
|
// check what we got
|
|
if (!m_pszMethod || !m_idMethod)
|
|
{
|
|
TraceTag(ttidWebServer, "Parser: missing URL or method, illformatted Request-line");
|
|
m_rs = STATUS_BADREQ;
|
|
myleave(291);
|
|
}
|
|
|
|
// Once we've read the request line, give filter shot at modifying the
|
|
// remaining headers.
|
|
if (g_pVars->m_fFilters &&
|
|
! CallFilter(SF_NOTIFY_PREPROC_HEADERS))
|
|
myleave(292);
|
|
|
|
|
|
m_wszPath = g_pVars->m_pVroots->URLAtoPathW(m_pszURL, &m_dwPermissions, &m_AuthLevelReqd,&m_VRootScriptType,&m_pszPathInfo,&m_wszVRootUserList);
|
|
|
|
if (g_pVars->m_fFilters &&
|
|
! CallFilter(SF_NOTIFY_URL_MAP))
|
|
myleave(293);
|
|
|
|
// get extension
|
|
if (m_wszPath && (pwszTemp = wcsrchr(m_wszPath, '.')))
|
|
m_wszExt = MySzDupW(pwszTemp);
|
|
|
|
|
|
// As per the docs, the filter gets ONLY 1 call per session to notify
|
|
// it of this event. m_dwAuthFlags is remembered from session to session.
|
|
|
|
// Like IIS, it always is called, even if Vroots is AUTH_PUBLIC already and
|
|
// even if no security has been enabled.
|
|
|
|
if ( g_pVars->m_fFilters && ! (m_dwAuthFlags & AUTH_FILTER_DONE))
|
|
{
|
|
if ( ! AuthenticateFilter())
|
|
myleave(294);
|
|
}
|
|
m_dwAuthFlags |= AUTH_FILTER_DONE;
|
|
|
|
ret = TRUE;
|
|
done:
|
|
TraceTag(ttidWebServer, "Parse headers failed, err = %d",err);
|
|
return ret;
|
|
}
|
|
|
|
BOOL CHttpRequest::ParseMethod(PCSTR pszMethod, int cbMethod)
|
|
{
|
|
DEBUG_CODE_INIT;
|
|
PSTR pszTok, pszTok2;
|
|
int iLen;
|
|
BOOL ret;
|
|
|
|
// save method
|
|
m_pszMethod = MySzDupA(pszMethod);
|
|
|
|
if (0 == memcmp(cszGET,pszMethod,cbMethod))
|
|
m_idMethod = TOK_GET;
|
|
else if (0 == memcmp(cszHEAD,pszMethod,cbMethod))
|
|
m_idMethod = TOK_HEAD;
|
|
else if (0 == memcmp(cszPOST,pszMethod,cbMethod))
|
|
m_idMethod = TOK_POST;
|
|
else
|
|
m_idMethod = TOK_UNKNOWN_VERB;
|
|
|
|
// get URL and HTTP/x.y together (allows for spaces in URL like Netscape sends)
|
|
if (!m_bufRequest.NextTokenEOL(&pszTok, &iLen))
|
|
myretleave(FALSE, 201);
|
|
|
|
// seperate out the HTTP/x.y
|
|
if (pszTok2 = strrchr(pszTok, ' '))
|
|
{
|
|
*pszTok2 = 0;
|
|
iLen = (INT)((INT_PTR)(pszTok2-pszTok));
|
|
pszTok2++;
|
|
}
|
|
|
|
// clean up & parse the URL
|
|
MyCrackURL(pszTok, iLen);
|
|
|
|
// get version (optional. HTTP 0.9 wont have this)
|
|
if (!pszTok2)
|
|
m_dwVersion = MAKELONG(9, 0);
|
|
else
|
|
{
|
|
// int iMajor, iMinor;
|
|
// sscanf(pszTok2, cszHTTPVER, &iMajor, &iMinor);
|
|
// m_dwVersion = MAKELONG(iMinor, iMajor);
|
|
SetHTTPVersion(pszTok2, &m_dwVersion);
|
|
|
|
pszTok2[-1] = ' '; // reset this to a space
|
|
}
|
|
ret = TRUE;
|
|
|
|
done:
|
|
TraceTag(ttidWebServer, "end ParseMethod (iGLE=%d iErr=%d)", GLE(err),err);
|
|
return ret;
|
|
}
|
|
|
|
// We assume a raw URL in the form that we receive in the HTTP headers (no scheme, port number etc)
|
|
// We extract the path, extra-path, and query
|
|
BOOL CHttpRequest::MyCrackURL(PSTR pszRawURL, int iLen)
|
|
{
|
|
DEBUG_CODE_INIT;
|
|
BOOL ret = FALSE;
|
|
PSTR pszDecodedURL=0, pszTemp=0, pszPartiallyDecodedURL=0;
|
|
int iLen2;
|
|
DWORD cchDecodedURL = iLen + 1; // including the NULL terminator
|
|
DWORD cchPartiallyDecodedURL = iLen + 1;
|
|
|
|
// decode URL (convert escape sequences etc)
|
|
if (NULL == (pszPartiallyDecodedURL = MyRgAllocNZ(CHAR, cchDecodedURL)))
|
|
myleave(382);
|
|
if (NULL == (pszDecodedURL = MyRgAllocNZ(CHAR, cchPartiallyDecodedURL)))
|
|
myleave(382);
|
|
|
|
// BUG FIX 393235 - When InternetCanonicalizeUrlA() is told to decode a URL and process the meta
|
|
// directories, it does them in the wrong order. Passing it:
|
|
// http://localhost:2869/upnphost/%2e./%2e./%2e./%2e./%2e./%2e./boot.ini
|
|
// results in:
|
|
// http://localhost:2869/upnphost/../../../../../../boot.ini
|
|
// which is clearly not safe. To work around this, we call it twice - once to decode the URL, and
|
|
// a second time to process the meta directories.
|
|
|
|
// First, decode the URL
|
|
if (!InternetCanonicalizeUrlA(pszRawURL,
|
|
pszPartiallyDecodedURL,
|
|
(DWORD*)&cchPartiallyDecodedURL,
|
|
ICU_NO_ENCODE | ICU_DECODE | ICU_BROWSER_MODE | ICU_NO_META))
|
|
{
|
|
TraceTag(ttidWebServer, "CHttpRequest::MyCrackURL - InternetCanonicalizeUrlA failed with GLE=%d\n", GetLastError());
|
|
myleave(383);
|
|
}
|
|
|
|
// Second, process the meta directories
|
|
if (!InternetCanonicalizeUrlA(pszPartiallyDecodedURL,
|
|
pszDecodedURL,
|
|
(DWORD*)&cchDecodedURL,
|
|
ICU_NO_ENCODE | ICU_BROWSER_MODE))
|
|
{
|
|
TraceTag(ttidWebServer, "CHttpRequest::MyCrackURL - InternetCanonicalizeUrlA failed with GLE=%d\n", GetLastError());
|
|
myleave(384);
|
|
}
|
|
|
|
|
|
// get query string
|
|
if (pszTemp = strchr(pszDecodedURL, '?'))
|
|
{
|
|
m_pszQueryString = MySzDupA(pszTemp+1);
|
|
*pszTemp = 0;
|
|
}
|
|
|
|
|
|
// Searching for an embedded ISAPI dll name, ie /wwww/isapi.dll/a/b.
|
|
// We load the file /www/isapi.dll and set PATH_INFO to /a/b
|
|
// Emebbed ASP file names are handled similiarly.
|
|
if (g_pVars->m_fExtensions)
|
|
{
|
|
if (pszTemp = strstr(pszDecodedURL,".dll/"))
|
|
{
|
|
m_pszPathInfo = MySzDupA(pszTemp + sizeof(".dll/") - 2);
|
|
pszTemp[sizeof(".dll/") - 2] = 0;
|
|
}
|
|
else if (pszTemp = strstr(pszDecodedURL,".asp/"))
|
|
{
|
|
m_pszPathInfo = MySzDupA(pszTemp + sizeof(".asp/") - 2);
|
|
pszTemp[sizeof(".asp/") - 2] = 0;
|
|
}
|
|
}
|
|
|
|
// save a copy of the cleaned up URL (MINUS query!)
|
|
// SPECIAL HACK: alloc one extra char in case we have to send a redirect back (see request.cpp)
|
|
|
|
iLen2 = strlen(pszDecodedURL);
|
|
m_pszURL = MySzAllocA(1+iLen2);
|
|
Nstrcpy(m_pszURL, pszDecodedURL, iLen2); // copy null-term too.
|
|
|
|
ret = TRUE;
|
|
done:
|
|
MyFree(pszDecodedURL);
|
|
MyFree(pszPartiallyDecodedURL);
|
|
TraceTag(ttidWebServer, "end MyCrackURL(%s) path=%s ext=%s query=%s (iGLE=%d iErr=%d)\r\n",
|
|
pszRawURL, m_wszPath, m_wszExt, m_pszQueryString, GLE(err), err);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
BOOL CHttpRequest::ParseContentLength(PCSTR pszMethod, TOKEN id)
|
|
{
|
|
PSTR pszTok = 0;
|
|
int iLen = 0;
|
|
|
|
// get length (first token after "Content-Type;")
|
|
if (m_bufRequest.NextTokenWS(&pszTok, &iLen) && pszTok && iLen)
|
|
{
|
|
m_dwContentLength = atoi(pszTok);
|
|
}
|
|
|
|
return TRUE;
|
|
|
|
}
|
|
|
|
BOOL CHttpRequest::ParseCookie(PCSTR pszMethod, TOKEN id)
|
|
{
|
|
PSTR pszTok = 0;
|
|
int iLen = 0;
|
|
|
|
// get cookie (upto \r\n after "Cookies;")
|
|
if (m_bufRequest.NextTokenEOL(&pszTok, &iLen) && pszTok && iLen)
|
|
{
|
|
m_pszCookie = MySzDupA(pszTok);
|
|
}
|
|
return TRUE;
|
|
|
|
}
|
|
|
|
BOOL CHttpRequest::ParseAccept(PCSTR pszMethod, TOKEN id)
|
|
{
|
|
PSTR pszTok = 0;
|
|
int iLen = 0;
|
|
|
|
// get cookie (upto \r\n after "Cookies;")
|
|
if (m_bufRequest.NextTokenEOL(&pszTok, &iLen) && pszTok && iLen)
|
|
{
|
|
m_pszAccept = MySzDupA(pszTok);
|
|
}
|
|
return TRUE;
|
|
|
|
}
|
|
|
|
|
|
BOOL CHttpRequest::ParseContentType(PCSTR pszMethod, TOKEN id)
|
|
{
|
|
PSTR pszTok = 0;
|
|
int iLen = 0;
|
|
|
|
// get type (first token after "Content-Type;")
|
|
if (m_bufRequest.NextTokenWS(&pszTok, &iLen) && pszTok && iLen)
|
|
{
|
|
m_pszContentType = MySzDupA(pszTok);
|
|
}
|
|
return TRUE;
|
|
}
|
|
|
|
const char cszDateParseFmt[] = " %*3s, %02hd %3s %04hd %02hd:%02hd:%02hd GMT; length=%d";
|
|
|
|
BOOL CHttpRequest::ParseIfModifiedSince(PCSTR pszMethod, TOKEN id)
|
|
{
|
|
PSTR pszTok = 0;
|
|
int iLen = 0;
|
|
int i = 0;
|
|
char szMonth[10];
|
|
SYSTEMTIME st;
|
|
ZEROMEM(&st);
|
|
|
|
// get the date (rest of line after If-Modified-Since)
|
|
// BUGBUG: Note we are handling only one date format (the "reccomended" one)
|
|
if (m_bufRequest.NextTokenEOL(&pszTok, &iLen) && pszTok && iLen)
|
|
{
|
|
// i = sscanf(pszTok, cszDateParseFmt, &st.wDay, &szMonth, &st.wYear, &st.wHour, &st.wMinute, &st.wSecond, &m_dwIfModifiedLength);
|
|
if ( SetHTTPDate(pszTok,szMonth,&st,&m_dwIfModifiedLength))
|
|
{
|
|
// try to match month
|
|
for (i=0; rgMonth[i]; i++)
|
|
{
|
|
if (0==strcmpi(szMonth, rgMonth[i]))
|
|
{
|
|
st.wMonth = (WORD)i;
|
|
// convert to filetime & store
|
|
SystemTimeToFileTime(&st, &m_ftIfModifiedSince);
|
|
return TRUE;
|
|
}
|
|
}
|
|
}
|
|
TraceTag(ttidWebServer, "Failed to parse If-Modified-Since(%s) Parsed: day=%02d month=%s(%d) year=%04d time=%02d:%02d:%02d len=%d\r\n",
|
|
pszTok, st.wDay, szMonth, i, st.wYear, st.wHour, st.wMinute, st.wSecond, m_dwIfModifiedLength);
|
|
}
|
|
return FALSE;
|
|
}
|
|
|
|
|
|
|
|
// Note: No filter calls to SF_NOTIFY_AUTHENT in this fcn
|
|
BOOL CHttpRequest::ParseAuthorization(PCSTR pszMethod, TOKEN id)
|
|
{
|
|
DEBUG_CODE_INIT;
|
|
BOOL ret = FALSE;
|
|
PSTR pszTok=0;
|
|
int iLen=0;
|
|
|
|
// get the auth scheme (first token after "Authorization;")
|
|
if (!m_bufRequest.NextTokenWS(&pszTok, &iLen) || !pszTok || !iLen)
|
|
myretleave(FALSE, 91);
|
|
|
|
m_pszAuthType = MySzDupA(pszTok);
|
|
|
|
if (g_pVars->m_fBasicAuth && 0==strcmpi(pszTok, cszBasic))
|
|
{
|
|
// get the scheme auth data (second token) [NOTE: cant get 2 tokens at once!!]
|
|
if (!m_bufRequest.NextTokenWS(&pszTok, &iLen) || !pszTok || !iLen)
|
|
myretleave(FALSE, 92);
|
|
|
|
|
|
if (!HandleBasicAuth(pszTok, &m_pszRemoteUser, &m_pszPassword,
|
|
&m_AuthLevelGranted, &m_NTLMState,m_wszVRootUserList))
|
|
myretleave(TRUE, 93);
|
|
|
|
TraceTag(ttidWebServer, "Basic Auth SUCCESS");
|
|
m_dwAuthFlags |= m_AuthLevelGranted;
|
|
ret = TRUE;
|
|
}
|
|
|
|
else if (g_pVars->m_fNTLMAuth && 0==strcmpi(pszTok, cszNTLM))
|
|
{
|
|
// get the scheme auth data (second token) [NOTE: cant get 2 tokens at once!!]
|
|
if (!m_bufRequest.NextTokenWS(&pszTok, &iLen) || !pszTok || !iLen)
|
|
myretleave(FALSE, 95);
|
|
|
|
if (!HandleNTLMAuth(pszTok))
|
|
myretleave(TRUE, 96);
|
|
|
|
TraceTag(ttidWebServer, "NTLM Auth SUCCESS");
|
|
ret = TRUE;
|
|
}
|
|
|
|
// We read in this data anyway. A filter could theoretically set an Access-denied
|
|
// even if neither NTLM or basic weren't set. AuthenticateFilter will handle
|
|
// this data later in that case.
|
|
// We store data in m_pszRawRemoteUser because it hasn't been Base64 decoded yet
|
|
else
|
|
{
|
|
// get the scheme auth data (second token) [NOTE: cant get 2 tokens at once!!]
|
|
if (!m_bufRequest.NextTokenWS(&pszTok, &iLen) || !pszTok || !iLen)
|
|
myretleave(FALSE, 97);
|
|
|
|
|
|
m_pszRawRemoteUser = MySzDupA(pszTok);
|
|
if (NULL == m_pszRemoteUser)
|
|
myretleave(FALSE, 98);
|
|
|
|
TraceTag(ttidWebServer, "Unknown authorization type requested OR requested type not enabled");
|
|
}
|
|
|
|
done:
|
|
TraceTag(ttidWebServer, "Auth FAILED (err=%d ret=%d)", err, ret);
|
|
|
|
return ret;
|
|
}
|
|
|
|
BOOL CHttpRequest::ParseConnection(PCSTR pszMethod, TOKEN id)
|
|
{
|
|
PSTR pszTok = 0;
|
|
int iLen = 0;
|
|
|
|
// get first token after "Connnection;"
|
|
if (m_bufRequest.NextTokenWS(&pszTok, &iLen) && pszTok && iLen)
|
|
{
|
|
if (0==strcmpi(pszTok, cszKeepAlive))
|
|
m_fKeepAlive = TRUE;
|
|
}
|
|
return TRUE;
|
|
}
|