windows-nt/Source/XPSP1/NT/inetsrv/iis/ui/itools/linkchk/linkload.cpp

528 lines
11 KiB
C++
Raw Normal View History

2020-09-26 03:20:57 -05:00
/*++
Copyright (c) 1996 Microsoft Corporation
Module Name :
linkload.cpp
Abstract:
Link loader class definitions. It uses wininet API
to load the web page from the internet.
Author:
Michael Cheuk (mcheuk) 22-Nov-1996
Project:
Link Checker
Revision History:
--*/
#include "stdafx.h"
#include "linkload.h"
#include "link.h"
#ifdef _DEBUG
#define new DEBUG_NEW
#undef THIS_FILE
static char THIS_FILE[] = __FILE__;
#endif
// Constants
const int iMaxRedirectCount_c = 3;
const UINT nReadFileBufferSize_c = 4096;
const UINT nQueryResultBufferSize_c = 1024;
BOOL
CLinkLoader::Create(
const CString& strUserAgent,
const CString& strAdditonalHeaders
)
/*++
Routine Description:
One time link loader create funtion
Arguments:
strUserAgent - HTTP user agent name
strAdditonalHeaders - addtional HTTP headers
Return Value:
BOOL - TRUE if success. FALSE otherwise.
--*/
{
// Make sure wininet.dll is loaded
ASSERT(CWininet::IsLoaded());
if(!CWininet::IsLoaded())
{
return FALSE;
}
// Save the additional header
m_strAdditionalHeaders = strAdditonalHeaders;
// Open an internet session
m_hInternetSession = CWininet::InternetOpenA(
strUserAgent,
PRE_CONFIG_INTERNET_ACCESS,
NULL,
INTERNET_INVALID_PORT_NUMBER,
0);
#ifdef _DEBUG
if(!m_hInternetSession)
{
TRACE(_T("CLinkLoader::Create() - InternetOpen() failed. GetLastError() = %d\n"),
GetLastError());
}
#endif
return (m_hInternetSession != NULL);
} // CLinkLoader::Create
BOOL
CLinkLoader::ChangeProperties(
const CString& strUserAgent,
const CString& strAdditionalHeaders
)
/*++
Routine Description:
Change the loader properties
Arguments:
strUserAgent - HTTP user agent name
strAdditonalHeaders - addtional HTTP headers
Return Value:
BOOL - TRUE if success. FALSE otherwise.
--*/
{
if(m_hInternetSession)
{
// Close the previous internet session and
// call Create() again
VERIFY(CWininet::InternetCloseHandle(m_hInternetSession));
return Create(strUserAgent, strAdditionalHeaders);
}
return FALSE;
} // CLinkLoader::ChangeProperties
BOOL
CLinkLoader::Load(
CLink& link,
BOOL fReadFile
)
/*++
Routine Description:
Load a web link
Arguments:
link - reference to the result link object
fReadFile - read the file and save it in the link object
Return Value:
BOOL - TRUE if success. FALSE otherwise.
--*/
{
// Make sure we have a session avaiable
ASSERT(m_hInternetSession);
if(!m_hInternetSession)
{
return FALSE;
}
// Crack the URL
TCHAR szHostName[INTERNET_MAX_HOST_NAME_LENGTH];
TCHAR szUrlPath[INTERNET_MAX_URL_LENGTH];
URL_COMPONENTS urlcomp;
memset(&urlcomp, 0, sizeof(urlcomp));
urlcomp.dwStructSize = sizeof(urlcomp);
urlcomp.lpszHostName = (LPTSTR) &szHostName;
urlcomp.dwHostNameLength = INTERNET_MAX_HOST_NAME_LENGTH;
urlcomp.lpszUrlPath = (LPTSTR) &szUrlPath;
urlcomp.dwUrlPathLength = INTERNET_MAX_URL_LENGTH;
if(!CWininet::InternetCrackUrlA(link.GetURL(), link.GetURL().GetLength(), NULL, &urlcomp))
{
TRACE(_T("CLinkLoader::Load() - InternetCrackUrl() failed. GetLastError() = %d\n"),
GetLastError());
return FALSE;
}
// Make sure we have a valid (non zero length) URL path
if(_tcslen(szUrlPath) == 0)
{
_tprintf(szUrlPath, "%s", _TCHAR('/'));
}
// Call the appropriate load funtion for different URL schemes
if(urlcomp.nScheme == INTERNET_SCHEME_HTTP)
{
return LoadHTTP(link, fReadFile, szHostName, szUrlPath);
}
else if(urlcomp.nScheme >= INTERNET_SCHEME_FTP &&
urlcomp.nScheme <= INTERNET_SCHEME_HTTPS)
{
return LoadURL(link);
}
else
{
TRACE(_T("CLinkLoader::Load() - unsupport URL scheme(%d)\n"), urlcomp.nScheme);
link.SetState(CLink::eUnsupport);
return FALSE;
}
} // CLinkLoader::Load
BOOL
CLinkLoader::LoadURL(
CLink& link
)
/*++
Routine Description:
Load a URL (non-HTTP) link
Arguments:
link - reference to the result link object
Return Value:
BOOL - TRUE if success. FALSE otherwise.
--*/
{
// Use InternetOpenUrl for all URL scheme except HTTP
CAutoInternetHandle hOpenURL;
hOpenURL = CWininet::InternetOpenUrlA(
m_hInternetSession,
link.GetURL(),
NULL,
0,
INTERNET_FLAG_DONT_CACHE,
0);
if(!hOpenURL)
{
TRACE(_T("CLinkLoader::LoadURL() - InternetOpenUrlA() failed."));
return WininetFailed(link);
}
else
{
link.SetState(CLink::eValidURL);
return TRUE;
}
} // CLinkLoader::LoadURL
BOOL
CLinkLoader::LoadHTTP(
CLink& link,
BOOL fReadFile,
LPCTSTR lpszHostName,
LPCTSTR lpszUrlPath,
int iRedirectCount /* = 0 */
)
/*++
Routine Description:
Load a HTTP link
Arguments:
link - reference to the result link object
fReadFile - read the file and save it in the link object
lpszHostName - hostname
lpszUrlPath - URL path
iRedirectCount - Looping count. It is used to keep track the
the number of redirection for current link.
Return Value:
BOOL - TRUE if success. FALSE otherwise.
--*/
{
// Open an http session
CAutoInternetHandle hHttpSession;
hHttpSession = CWininet::InternetConnectA(
m_hInternetSession, // hInternetSession
lpszHostName, // lpszServerName
INTERNET_INVALID_PORT_NUMBER, // nServerPort
_T(""), // lpszUsername
_T(""), // lpszPassword
INTERNET_SERVICE_HTTP, // dwService
0, // dwFlags
0); // dwContext
if(!hHttpSession)
{
TRACE(_T("CLinkLoader::LoadHTTP() - InternetConnect() failed."));
return WininetFailed(link);
}
// Open an http request
CAutoInternetHandle hHttpRequest;
hHttpRequest = CWininet::HttpOpenRequestA(
hHttpSession, // hHttpSession
_T("GET"), // lpszVerb
lpszUrlPath, // lpszObjectName
HTTP_VERSION, // lpszVersion
link.GetBase(), // lpszReferer
NULL, // lpszAcceptTypes
INTERNET_FLAG_NO_AUTO_REDIRECT | INTERNET_FLAG_DONT_CACHE, // dwFlags
0); // dwContext
if(!hHttpRequest)
{
TRACE(_T("CLinkLoader::LoadHTTP() - HttpOpenRequest() failed."));
return WininetFailed(link);
}
// Sent the http request
if(!CWininet::HttpSendRequestA(
hHttpRequest, // hHttpRequest
m_strAdditionalHeaders, // lpszHeaders
(DWORD)-1, // dwHeadersLength
0, // lpOptional
0)) // dwOptionalLength
{
TRACE(_T("CLinkLoader::LoadHTTP() - HttpSendRequest() failed."));
return WininetFailed(link);
}
TCHAR szQueryResult[nQueryResultBufferSize_c];
DWORD dwQueryLength = sizeof(szQueryResult);
// Check the result status code
if(!CWininet::HttpQueryInfoA(
hHttpRequest, // hHttpRequest
HTTP_QUERY_STATUS_CODE, // dwInfoLevel
szQueryResult, // lpvBuffer
&dwQueryLength, // lpdwBufferLength
NULL)) // lpdwIndex
{
TRACE(_T("CLinkLoader::LoadHTTP() - HttpQueryInfo() failed."));
return WininetFailed(link);
}
// Check for 301 Move Permanently or 302 Move Temporarily
if(_ttoi(szQueryResult) == 301 || _ttoi(szQueryResult) == 302)
{
// We can only redirect iMaxRedirectCount_c times
if(iRedirectCount > iMaxRedirectCount_c)
{
return FALSE;
}
// Get the new location
dwQueryLength = sizeof(szQueryResult);
if(!CWininet::HttpQueryInfoA(
hHttpRequest, // hHttpRequest
HTTP_QUERY_LOCATION, // dwInfoLevel
szQueryResult, // lpvBuffer
&dwQueryLength, // lpdwBufferLength
NULL)) // lpdwIndex
{
TRACE(_T("CLinkLoader::LoadHTTP() - HttpQueryInfo() failed."));
return WininetFailed(link);
}
// We only update the URL in link object if
// we are redirecting from http://hostname/xyz to http://hostname/xyz/
if(link.GetURL().GetLength() + 1 == (int)dwQueryLength &&
link.GetURL().GetAt(link.GetURL().GetLength() - 1) != _TCHAR('/') &&
szQueryResult[dwQueryLength - 1] == _TCHAR('/'))
{
link.SetURL(szQueryResult);
}
// Crack the URL & call LoadHTTP again
TCHAR szHostName[INTERNET_MAX_HOST_NAME_LENGTH];
TCHAR szUrlPath[INTERNET_MAX_URL_LENGTH];
// Crack the URL
URL_COMPONENTS urlcomp;
memset(&urlcomp, 0, sizeof(urlcomp));
urlcomp.dwStructSize = sizeof(urlcomp);
urlcomp.lpszHostName = (LPTSTR) &szHostName;
urlcomp.dwHostNameLength = INTERNET_MAX_HOST_NAME_LENGTH;
urlcomp.lpszUrlPath = (LPTSTR) &szUrlPath;
urlcomp.dwUrlPathLength = INTERNET_MAX_URL_LENGTH;
VERIFY(CWininet::InternetCrackUrlA(szQueryResult, dwQueryLength, NULL, &urlcomp));
return LoadHTTP(link, fReadFile, szHostName, szUrlPath, ++iRedirectCount);
}
// Update the HTTP status code
link.SetStatusCode(_ttoi(szQueryResult));
// If the status code is not 2xx. it is a invalid link
if(szQueryResult[0] != '2')
{
link.SetState(CLink::eInvalidHTTP);
// Get the new location
dwQueryLength = sizeof(szQueryResult);
if(CWininet::HttpQueryInfoA(
hHttpRequest, // hHttpRequest
HTTP_QUERY_STATUS_TEXT, // dwInfoLevel
szQueryResult, // lpvBuffer
&dwQueryLength, // lpdwBufferLength
NULL)) // lpdwIndex
{
link.SetStatusText(szQueryResult);
}
return FALSE;
}
// Now we have a valid http link
link.SetState(CLink::eValidHTTP);
// If we are not reading the file, we can return now
if(!fReadFile)
{
return TRUE;
}
// Check the result content-type
dwQueryLength = sizeof(szQueryResult);
if(!CWininet::HttpQueryInfoA(
hHttpRequest, // hHttpRequest
HTTP_QUERY_CONTENT_TYPE,// dwInfoLevel
szQueryResult, // lpvBuffer
&dwQueryLength, // lpdwBufferLength
NULL)) // lpdwIndex
{
TRACE(_T("CLinkLoader::LoadHTTP() - HttpQueryInfo() failed."));
return WininetFailed(link);
}
// We only load the html text for parsing
if(!_tcsstr(szQueryResult, _T("text/html")) )
{
return TRUE;
}
link.SetContentType(CLink::eText);
CString strBuffer;
TCHAR buf[nReadFileBufferSize_c];
DWORD dwBytesRead;
// Load the text html in a loop
do
{
memset(buf, 0, sizeof(buf));
if(CWininet::InternetReadFile(
hHttpRequest, // hFile
buf, // lpBuffer
sizeof(buf), // dwNumberOfBytesToRead
&dwBytesRead)) // lpNumberOfBytesRead
{
strBuffer += buf;
}
else
{
TRACE(_T("CLinkLoader::LoadHTTP() - InternetReadFile() failed."));
return WininetFailed(link);
}
}
while(dwBytesRead);
// Set the InternetReadFile result in the link object
link.SetData(strBuffer);
return TRUE;
} // CLinkLoader::LoadHTTP
BOOL
CLinkLoader::WininetFailed(
CLink& link
)
/*++
Routine Description:
Wininet failed clean up subroutine
Arguments:
link - reference to the result link object
Return Value:
BOOL - Alway return TRUE
--*/
{
link.SetState(CLink::eInvalidWininet);
link.SetStatusCode(GetLastError());
TRACE(_T(" GetLastError() = %d\n"), link.GetStatusCode());
LPTSTR lpMsgBuf;
if(FormatMessage(
FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_HMODULE | FORMAT_MESSAGE_FROM_SYSTEM,
CWininet::GetWininetModule(),
GetLastError(),
MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), // Default language
(LPTSTR) &lpMsgBuf,
0,
NULL) > 0)
{
link.SetStatusText(lpMsgBuf);
LocalFree(lpMsgBuf);
}
return FALSE;
} // CLinkLoader::WininetFailed