528 lines
11 KiB
C++
528 lines
11 KiB
C++
|
/*++
|
||
|
|
||
|
Copyright (c) 1996 Microsoft Corporation
|
||
|
|
||
|
Module Name :
|
||
|
|
||
|
linkload.cpp
|
||
|
|
||
|
Abstract:
|
||
|
|
||
|
Link loader class definitions. It uses wininet API
|
||
|
to load the web page from the internet.
|
||
|
|
||
|
Author:
|
||
|
|
||
|
Michael Cheuk (mcheuk) 22-Nov-1996
|
||
|
|
||
|
Project:
|
||
|
|
||
|
Link Checker
|
||
|
|
||
|
Revision History:
|
||
|
|
||
|
--*/
|
||
|
|
||
|
#include "stdafx.h"
|
||
|
#include "linkload.h"
|
||
|
|
||
|
#include "link.h"
|
||
|
|
||
|
#ifdef _DEBUG
|
||
|
#define new DEBUG_NEW
|
||
|
#undef THIS_FILE
|
||
|
static char THIS_FILE[] = __FILE__;
|
||
|
#endif
|
||
|
|
||
|
// Constants
|
||
|
const int iMaxRedirectCount_c = 3;
|
||
|
const UINT nReadFileBufferSize_c = 4096;
|
||
|
const UINT nQueryResultBufferSize_c = 1024;
|
||
|
|
||
|
|
||
|
BOOL
|
||
|
CLinkLoader::Create(
|
||
|
const CString& strUserAgent,
|
||
|
const CString& strAdditonalHeaders
|
||
|
)
|
||
|
/*++
|
||
|
|
||
|
Routine Description:
|
||
|
|
||
|
One time link loader create funtion
|
||
|
|
||
|
Arguments:
|
||
|
|
||
|
strUserAgent - HTTP user agent name
|
||
|
strAdditonalHeaders - addtional HTTP headers
|
||
|
|
||
|
Return Value:
|
||
|
|
||
|
BOOL - TRUE if success. FALSE otherwise.
|
||
|
|
||
|
--*/
|
||
|
{
|
||
|
// Make sure wininet.dll is loaded
|
||
|
ASSERT(CWininet::IsLoaded());
|
||
|
if(!CWininet::IsLoaded())
|
||
|
{
|
||
|
return FALSE;
|
||
|
}
|
||
|
|
||
|
// Save the additional header
|
||
|
m_strAdditionalHeaders = strAdditonalHeaders;
|
||
|
|
||
|
// Open an internet session
|
||
|
m_hInternetSession = CWininet::InternetOpenA(
|
||
|
strUserAgent,
|
||
|
PRE_CONFIG_INTERNET_ACCESS,
|
||
|
NULL,
|
||
|
INTERNET_INVALID_PORT_NUMBER,
|
||
|
0);
|
||
|
|
||
|
#ifdef _DEBUG
|
||
|
if(!m_hInternetSession)
|
||
|
{
|
||
|
TRACE(_T("CLinkLoader::Create() - InternetOpen() failed. GetLastError() = %d\n"),
|
||
|
GetLastError());
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
return (m_hInternetSession != NULL);
|
||
|
|
||
|
} // CLinkLoader::Create
|
||
|
|
||
|
|
||
|
BOOL
|
||
|
CLinkLoader::ChangeProperties(
|
||
|
const CString& strUserAgent,
|
||
|
const CString& strAdditionalHeaders
|
||
|
)
|
||
|
/*++
|
||
|
|
||
|
Routine Description:
|
||
|
|
||
|
Change the loader properties
|
||
|
|
||
|
Arguments:
|
||
|
|
||
|
strUserAgent - HTTP user agent name
|
||
|
strAdditonalHeaders - addtional HTTP headers
|
||
|
|
||
|
Return Value:
|
||
|
|
||
|
BOOL - TRUE if success. FALSE otherwise.
|
||
|
|
||
|
--*/
|
||
|
{
|
||
|
if(m_hInternetSession)
|
||
|
{
|
||
|
// Close the previous internet session and
|
||
|
// call Create() again
|
||
|
VERIFY(CWininet::InternetCloseHandle(m_hInternetSession));
|
||
|
return Create(strUserAgent, strAdditionalHeaders);
|
||
|
}
|
||
|
|
||
|
return FALSE;
|
||
|
|
||
|
} // CLinkLoader::ChangeProperties
|
||
|
|
||
|
|
||
|
BOOL
|
||
|
CLinkLoader::Load(
|
||
|
CLink& link,
|
||
|
BOOL fReadFile
|
||
|
)
|
||
|
/*++
|
||
|
|
||
|
Routine Description:
|
||
|
|
||
|
Load a web link
|
||
|
|
||
|
Arguments:
|
||
|
|
||
|
link - reference to the result link object
|
||
|
fReadFile - read the file and save it in the link object
|
||
|
|
||
|
Return Value:
|
||
|
|
||
|
BOOL - TRUE if success. FALSE otherwise.
|
||
|
|
||
|
--*/
|
||
|
{
|
||
|
// Make sure we have a session avaiable
|
||
|
ASSERT(m_hInternetSession);
|
||
|
if(!m_hInternetSession)
|
||
|
{
|
||
|
return FALSE;
|
||
|
}
|
||
|
|
||
|
// Crack the URL
|
||
|
TCHAR szHostName[INTERNET_MAX_HOST_NAME_LENGTH];
|
||
|
TCHAR szUrlPath[INTERNET_MAX_URL_LENGTH];
|
||
|
URL_COMPONENTS urlcomp;
|
||
|
|
||
|
memset(&urlcomp, 0, sizeof(urlcomp));
|
||
|
urlcomp.dwStructSize = sizeof(urlcomp);
|
||
|
|
||
|
urlcomp.lpszHostName = (LPTSTR) &szHostName;
|
||
|
urlcomp.dwHostNameLength = INTERNET_MAX_HOST_NAME_LENGTH;
|
||
|
|
||
|
urlcomp.lpszUrlPath = (LPTSTR) &szUrlPath;
|
||
|
urlcomp.dwUrlPathLength = INTERNET_MAX_URL_LENGTH;
|
||
|
|
||
|
if(!CWininet::InternetCrackUrlA(link.GetURL(), link.GetURL().GetLength(), NULL, &urlcomp))
|
||
|
{
|
||
|
TRACE(_T("CLinkLoader::Load() - InternetCrackUrl() failed. GetLastError() = %d\n"),
|
||
|
GetLastError());
|
||
|
return FALSE;
|
||
|
}
|
||
|
|
||
|
// Make sure we have a valid (non zero length) URL path
|
||
|
if(_tcslen(szUrlPath) == 0)
|
||
|
{
|
||
|
_tprintf(szUrlPath, "%s", _TCHAR('/'));
|
||
|
}
|
||
|
|
||
|
// Call the appropriate load funtion for different URL schemes
|
||
|
if(urlcomp.nScheme == INTERNET_SCHEME_HTTP)
|
||
|
{
|
||
|
return LoadHTTP(link, fReadFile, szHostName, szUrlPath);
|
||
|
}
|
||
|
else if(urlcomp.nScheme >= INTERNET_SCHEME_FTP &&
|
||
|
urlcomp.nScheme <= INTERNET_SCHEME_HTTPS)
|
||
|
{
|
||
|
return LoadURL(link);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
TRACE(_T("CLinkLoader::Load() - unsupport URL scheme(%d)\n"), urlcomp.nScheme);
|
||
|
link.SetState(CLink::eUnsupport);
|
||
|
return FALSE;
|
||
|
}
|
||
|
|
||
|
} // CLinkLoader::Load
|
||
|
|
||
|
|
||
|
BOOL
|
||
|
CLinkLoader::LoadURL(
|
||
|
CLink& link
|
||
|
)
|
||
|
/*++
|
||
|
|
||
|
Routine Description:
|
||
|
|
||
|
Load a URL (non-HTTP) link
|
||
|
|
||
|
Arguments:
|
||
|
|
||
|
link - reference to the result link object
|
||
|
|
||
|
Return Value:
|
||
|
|
||
|
BOOL - TRUE if success. FALSE otherwise.
|
||
|
|
||
|
--*/
|
||
|
{
|
||
|
// Use InternetOpenUrl for all URL scheme except HTTP
|
||
|
CAutoInternetHandle hOpenURL;
|
||
|
hOpenURL = CWininet::InternetOpenUrlA(
|
||
|
m_hInternetSession,
|
||
|
link.GetURL(),
|
||
|
NULL,
|
||
|
0,
|
||
|
INTERNET_FLAG_DONT_CACHE,
|
||
|
0);
|
||
|
|
||
|
if(!hOpenURL)
|
||
|
{
|
||
|
TRACE(_T("CLinkLoader::LoadURL() - InternetOpenUrlA() failed."));
|
||
|
return WininetFailed(link);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
link.SetState(CLink::eValidURL);
|
||
|
return TRUE;
|
||
|
}
|
||
|
|
||
|
} // CLinkLoader::LoadURL
|
||
|
|
||
|
|
||
|
BOOL
|
||
|
CLinkLoader::LoadHTTP(
|
||
|
CLink& link,
|
||
|
BOOL fReadFile,
|
||
|
LPCTSTR lpszHostName,
|
||
|
LPCTSTR lpszUrlPath,
|
||
|
int iRedirectCount /* = 0 */
|
||
|
)
|
||
|
/*++
|
||
|
|
||
|
Routine Description:
|
||
|
|
||
|
Load a HTTP link
|
||
|
|
||
|
Arguments:
|
||
|
|
||
|
link - reference to the result link object
|
||
|
fReadFile - read the file and save it in the link object
|
||
|
lpszHostName - hostname
|
||
|
lpszUrlPath - URL path
|
||
|
iRedirectCount - Looping count. It is used to keep track the
|
||
|
the number of redirection for current link.
|
||
|
|
||
|
Return Value:
|
||
|
|
||
|
BOOL - TRUE if success. FALSE otherwise.
|
||
|
|
||
|
--*/
|
||
|
{
|
||
|
// Open an http session
|
||
|
CAutoInternetHandle hHttpSession;
|
||
|
hHttpSession = CWininet::InternetConnectA(
|
||
|
m_hInternetSession, // hInternetSession
|
||
|
lpszHostName, // lpszServerName
|
||
|
INTERNET_INVALID_PORT_NUMBER, // nServerPort
|
||
|
_T(""), // lpszUsername
|
||
|
_T(""), // lpszPassword
|
||
|
INTERNET_SERVICE_HTTP, // dwService
|
||
|
0, // dwFlags
|
||
|
0); // dwContext
|
||
|
|
||
|
if(!hHttpSession)
|
||
|
{
|
||
|
TRACE(_T("CLinkLoader::LoadHTTP() - InternetConnect() failed."));
|
||
|
return WininetFailed(link);
|
||
|
}
|
||
|
|
||
|
// Open an http request
|
||
|
CAutoInternetHandle hHttpRequest;
|
||
|
hHttpRequest = CWininet::HttpOpenRequestA(
|
||
|
hHttpSession, // hHttpSession
|
||
|
_T("GET"), // lpszVerb
|
||
|
lpszUrlPath, // lpszObjectName
|
||
|
HTTP_VERSION, // lpszVersion
|
||
|
link.GetBase(), // lpszReferer
|
||
|
NULL, // lpszAcceptTypes
|
||
|
INTERNET_FLAG_NO_AUTO_REDIRECT | INTERNET_FLAG_DONT_CACHE, // dwFlags
|
||
|
0); // dwContext
|
||
|
|
||
|
if(!hHttpRequest)
|
||
|
{
|
||
|
TRACE(_T("CLinkLoader::LoadHTTP() - HttpOpenRequest() failed."));
|
||
|
return WininetFailed(link);
|
||
|
}
|
||
|
|
||
|
// Sent the http request
|
||
|
if(!CWininet::HttpSendRequestA(
|
||
|
hHttpRequest, // hHttpRequest
|
||
|
m_strAdditionalHeaders, // lpszHeaders
|
||
|
(DWORD)-1, // dwHeadersLength
|
||
|
0, // lpOptional
|
||
|
0)) // dwOptionalLength
|
||
|
{
|
||
|
TRACE(_T("CLinkLoader::LoadHTTP() - HttpSendRequest() failed."));
|
||
|
return WininetFailed(link);
|
||
|
}
|
||
|
|
||
|
TCHAR szQueryResult[nQueryResultBufferSize_c];
|
||
|
DWORD dwQueryLength = sizeof(szQueryResult);
|
||
|
|
||
|
// Check the result status code
|
||
|
if(!CWininet::HttpQueryInfoA(
|
||
|
hHttpRequest, // hHttpRequest
|
||
|
HTTP_QUERY_STATUS_CODE, // dwInfoLevel
|
||
|
szQueryResult, // lpvBuffer
|
||
|
&dwQueryLength, // lpdwBufferLength
|
||
|
NULL)) // lpdwIndex
|
||
|
{
|
||
|
TRACE(_T("CLinkLoader::LoadHTTP() - HttpQueryInfo() failed."));
|
||
|
return WininetFailed(link);
|
||
|
}
|
||
|
|
||
|
// Check for 301 Move Permanently or 302 Move Temporarily
|
||
|
if(_ttoi(szQueryResult) == 301 || _ttoi(szQueryResult) == 302)
|
||
|
{
|
||
|
// We can only redirect iMaxRedirectCount_c times
|
||
|
if(iRedirectCount > iMaxRedirectCount_c)
|
||
|
{
|
||
|
return FALSE;
|
||
|
}
|
||
|
|
||
|
// Get the new location
|
||
|
dwQueryLength = sizeof(szQueryResult);
|
||
|
|
||
|
if(!CWininet::HttpQueryInfoA(
|
||
|
hHttpRequest, // hHttpRequest
|
||
|
HTTP_QUERY_LOCATION, // dwInfoLevel
|
||
|
szQueryResult, // lpvBuffer
|
||
|
&dwQueryLength, // lpdwBufferLength
|
||
|
NULL)) // lpdwIndex
|
||
|
{
|
||
|
TRACE(_T("CLinkLoader::LoadHTTP() - HttpQueryInfo() failed."));
|
||
|
return WininetFailed(link);
|
||
|
}
|
||
|
|
||
|
// We only update the URL in link object if
|
||
|
// we are redirecting from http://hostname/xyz to http://hostname/xyz/
|
||
|
if(link.GetURL().GetLength() + 1 == (int)dwQueryLength &&
|
||
|
link.GetURL().GetAt(link.GetURL().GetLength() - 1) != _TCHAR('/') &&
|
||
|
szQueryResult[dwQueryLength - 1] == _TCHAR('/'))
|
||
|
{
|
||
|
link.SetURL(szQueryResult);
|
||
|
}
|
||
|
|
||
|
// Crack the URL & call LoadHTTP again
|
||
|
TCHAR szHostName[INTERNET_MAX_HOST_NAME_LENGTH];
|
||
|
TCHAR szUrlPath[INTERNET_MAX_URL_LENGTH];
|
||
|
|
||
|
// Crack the URL
|
||
|
URL_COMPONENTS urlcomp;
|
||
|
|
||
|
memset(&urlcomp, 0, sizeof(urlcomp));
|
||
|
urlcomp.dwStructSize = sizeof(urlcomp);
|
||
|
|
||
|
urlcomp.lpszHostName = (LPTSTR) &szHostName;
|
||
|
urlcomp.dwHostNameLength = INTERNET_MAX_HOST_NAME_LENGTH;
|
||
|
|
||
|
urlcomp.lpszUrlPath = (LPTSTR) &szUrlPath;
|
||
|
urlcomp.dwUrlPathLength = INTERNET_MAX_URL_LENGTH;
|
||
|
|
||
|
VERIFY(CWininet::InternetCrackUrlA(szQueryResult, dwQueryLength, NULL, &urlcomp));
|
||
|
|
||
|
return LoadHTTP(link, fReadFile, szHostName, szUrlPath, ++iRedirectCount);
|
||
|
}
|
||
|
|
||
|
|
||
|
// Update the HTTP status code
|
||
|
link.SetStatusCode(_ttoi(szQueryResult));
|
||
|
|
||
|
// If the status code is not 2xx. it is a invalid link
|
||
|
if(szQueryResult[0] != '2')
|
||
|
{
|
||
|
link.SetState(CLink::eInvalidHTTP);
|
||
|
|
||
|
// Get the new location
|
||
|
dwQueryLength = sizeof(szQueryResult);
|
||
|
|
||
|
if(CWininet::HttpQueryInfoA(
|
||
|
hHttpRequest, // hHttpRequest
|
||
|
HTTP_QUERY_STATUS_TEXT, // dwInfoLevel
|
||
|
szQueryResult, // lpvBuffer
|
||
|
&dwQueryLength, // lpdwBufferLength
|
||
|
NULL)) // lpdwIndex
|
||
|
{
|
||
|
link.SetStatusText(szQueryResult);
|
||
|
}
|
||
|
|
||
|
return FALSE;
|
||
|
}
|
||
|
|
||
|
// Now we have a valid http link
|
||
|
link.SetState(CLink::eValidHTTP);
|
||
|
|
||
|
// If we are not reading the file, we can return now
|
||
|
if(!fReadFile)
|
||
|
{
|
||
|
return TRUE;
|
||
|
}
|
||
|
|
||
|
// Check the result content-type
|
||
|
dwQueryLength = sizeof(szQueryResult);
|
||
|
if(!CWininet::HttpQueryInfoA(
|
||
|
hHttpRequest, // hHttpRequest
|
||
|
HTTP_QUERY_CONTENT_TYPE,// dwInfoLevel
|
||
|
szQueryResult, // lpvBuffer
|
||
|
&dwQueryLength, // lpdwBufferLength
|
||
|
NULL)) // lpdwIndex
|
||
|
{
|
||
|
TRACE(_T("CLinkLoader::LoadHTTP() - HttpQueryInfo() failed."));
|
||
|
return WininetFailed(link);
|
||
|
}
|
||
|
|
||
|
// We only load the html text for parsing
|
||
|
if(!_tcsstr(szQueryResult, _T("text/html")) )
|
||
|
{
|
||
|
return TRUE;
|
||
|
}
|
||
|
|
||
|
link.SetContentType(CLink::eText);
|
||
|
|
||
|
CString strBuffer;
|
||
|
TCHAR buf[nReadFileBufferSize_c];
|
||
|
DWORD dwBytesRead;
|
||
|
|
||
|
// Load the text html in a loop
|
||
|
do
|
||
|
{
|
||
|
memset(buf, 0, sizeof(buf));
|
||
|
|
||
|
if(CWininet::InternetReadFile(
|
||
|
hHttpRequest, // hFile
|
||
|
buf, // lpBuffer
|
||
|
sizeof(buf), // dwNumberOfBytesToRead
|
||
|
&dwBytesRead)) // lpNumberOfBytesRead
|
||
|
{
|
||
|
strBuffer += buf;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
TRACE(_T("CLinkLoader::LoadHTTP() - InternetReadFile() failed."));
|
||
|
return WininetFailed(link);
|
||
|
}
|
||
|
}
|
||
|
while(dwBytesRead);
|
||
|
|
||
|
// Set the InternetReadFile result in the link object
|
||
|
link.SetData(strBuffer);
|
||
|
|
||
|
return TRUE;
|
||
|
|
||
|
} // CLinkLoader::LoadHTTP
|
||
|
|
||
|
|
||
|
|
||
|
BOOL
|
||
|
CLinkLoader::WininetFailed(
|
||
|
CLink& link
|
||
|
)
|
||
|
/*++
|
||
|
|
||
|
Routine Description:
|
||
|
|
||
|
Wininet failed clean up subroutine
|
||
|
|
||
|
Arguments:
|
||
|
|
||
|
link - reference to the result link object
|
||
|
|
||
|
Return Value:
|
||
|
|
||
|
BOOL - Alway return TRUE
|
||
|
|
||
|
--*/
|
||
|
{
|
||
|
link.SetState(CLink::eInvalidWininet);
|
||
|
link.SetStatusCode(GetLastError());
|
||
|
TRACE(_T(" GetLastError() = %d\n"), link.GetStatusCode());
|
||
|
|
||
|
LPTSTR lpMsgBuf;
|
||
|
|
||
|
if(FormatMessage(
|
||
|
FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_HMODULE | FORMAT_MESSAGE_FROM_SYSTEM,
|
||
|
CWininet::GetWininetModule(),
|
||
|
GetLastError(),
|
||
|
MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), // Default language
|
||
|
(LPTSTR) &lpMsgBuf,
|
||
|
0,
|
||
|
NULL) > 0)
|
||
|
{
|
||
|
link.SetStatusText(lpMsgBuf);
|
||
|
LocalFree(lpMsgBuf);
|
||
|
}
|
||
|
|
||
|
return FALSE;
|
||
|
|
||
|
} // CLinkLoader::WininetFailed
|
||
|
|