/*++ Copyright (c) 1996 Microsoft Corporation Module Name : linkload.cpp Abstract: Link loader class definitions. It uses wininet API to load the web page from the internet. Author: Michael Cheuk (mcheuk) 22-Nov-1996 Project: Link Checker Revision History: --*/ #include "stdafx.h" #include "linkload.h" #include "link.h" #ifdef _DEBUG #define new DEBUG_NEW #undef THIS_FILE static char THIS_FILE[] = __FILE__; #endif // Constants const int iMaxRedirectCount_c = 3; const UINT nReadFileBufferSize_c = 4096; const UINT nQueryResultBufferSize_c = 1024; BOOL CLinkLoader::Create( const CString& strUserAgent, const CString& strAdditonalHeaders ) /*++ Routine Description: One time link loader create funtion Arguments: strUserAgent - HTTP user agent name strAdditonalHeaders - addtional HTTP headers Return Value: BOOL - TRUE if success. FALSE otherwise. --*/ { // Make sure wininet.dll is loaded ASSERT(CWininet::IsLoaded()); if(!CWininet::IsLoaded()) { return FALSE; } // Save the additional header m_strAdditionalHeaders = strAdditonalHeaders; // Open an internet session m_hInternetSession = CWininet::InternetOpenA( strUserAgent, PRE_CONFIG_INTERNET_ACCESS, NULL, INTERNET_INVALID_PORT_NUMBER, 0); #ifdef _DEBUG if(!m_hInternetSession) { TRACE(_T("CLinkLoader::Create() - InternetOpen() failed. GetLastError() = %d\n"), GetLastError()); } #endif return (m_hInternetSession != NULL); } // CLinkLoader::Create BOOL CLinkLoader::ChangeProperties( const CString& strUserAgent, const CString& strAdditionalHeaders ) /*++ Routine Description: Change the loader properties Arguments: strUserAgent - HTTP user agent name strAdditonalHeaders - addtional HTTP headers Return Value: BOOL - TRUE if success. FALSE otherwise. --*/ { if(m_hInternetSession) { // Close the previous internet session and // call Create() again VERIFY(CWininet::InternetCloseHandle(m_hInternetSession)); return Create(strUserAgent, strAdditionalHeaders); } return FALSE; } // CLinkLoader::ChangeProperties BOOL CLinkLoader::Load( CLink& link, BOOL fReadFile ) /*++ Routine Description: Load a web link Arguments: link - reference to the result link object fReadFile - read the file and save it in the link object Return Value: BOOL - TRUE if success. FALSE otherwise. --*/ { // Make sure we have a session avaiable ASSERT(m_hInternetSession); if(!m_hInternetSession) { return FALSE; } // Crack the URL TCHAR szHostName[INTERNET_MAX_HOST_NAME_LENGTH]; TCHAR szUrlPath[INTERNET_MAX_URL_LENGTH]; URL_COMPONENTS urlcomp; memset(&urlcomp, 0, sizeof(urlcomp)); urlcomp.dwStructSize = sizeof(urlcomp); urlcomp.lpszHostName = (LPTSTR) &szHostName; urlcomp.dwHostNameLength = INTERNET_MAX_HOST_NAME_LENGTH; urlcomp.lpszUrlPath = (LPTSTR) &szUrlPath; urlcomp.dwUrlPathLength = INTERNET_MAX_URL_LENGTH; if(!CWininet::InternetCrackUrlA(link.GetURL(), link.GetURL().GetLength(), NULL, &urlcomp)) { TRACE(_T("CLinkLoader::Load() - InternetCrackUrl() failed. GetLastError() = %d\n"), GetLastError()); return FALSE; } // Make sure we have a valid (non zero length) URL path if(_tcslen(szUrlPath) == 0) { _tprintf(szUrlPath, "%s", _TCHAR('/')); } // Call the appropriate load funtion for different URL schemes if(urlcomp.nScheme == INTERNET_SCHEME_HTTP) { return LoadHTTP(link, fReadFile, szHostName, szUrlPath); } else if(urlcomp.nScheme >= INTERNET_SCHEME_FTP && urlcomp.nScheme <= INTERNET_SCHEME_HTTPS) { return LoadURL(link); } else { TRACE(_T("CLinkLoader::Load() - unsupport URL scheme(%d)\n"), urlcomp.nScheme); link.SetState(CLink::eUnsupport); return FALSE; } } // CLinkLoader::Load BOOL CLinkLoader::LoadURL( CLink& link ) /*++ Routine Description: Load a URL (non-HTTP) link Arguments: link - reference to the result link object Return Value: BOOL - TRUE if success. FALSE otherwise. --*/ { // Use InternetOpenUrl for all URL scheme except HTTP CAutoInternetHandle hOpenURL; hOpenURL = CWininet::InternetOpenUrlA( m_hInternetSession, link.GetURL(), NULL, 0, INTERNET_FLAG_DONT_CACHE, 0); if(!hOpenURL) { TRACE(_T("CLinkLoader::LoadURL() - InternetOpenUrlA() failed.")); return WininetFailed(link); } else { link.SetState(CLink::eValidURL); return TRUE; } } // CLinkLoader::LoadURL BOOL CLinkLoader::LoadHTTP( CLink& link, BOOL fReadFile, LPCTSTR lpszHostName, LPCTSTR lpszUrlPath, int iRedirectCount /* = 0 */ ) /*++ Routine Description: Load a HTTP link Arguments: link - reference to the result link object fReadFile - read the file and save it in the link object lpszHostName - hostname lpszUrlPath - URL path iRedirectCount - Looping count. It is used to keep track the the number of redirection for current link. Return Value: BOOL - TRUE if success. FALSE otherwise. --*/ { // Open an http session CAutoInternetHandle hHttpSession; hHttpSession = CWininet::InternetConnectA( m_hInternetSession, // hInternetSession lpszHostName, // lpszServerName INTERNET_INVALID_PORT_NUMBER, // nServerPort _T(""), // lpszUsername _T(""), // lpszPassword INTERNET_SERVICE_HTTP, // dwService 0, // dwFlags 0); // dwContext if(!hHttpSession) { TRACE(_T("CLinkLoader::LoadHTTP() - InternetConnect() failed.")); return WininetFailed(link); } // Open an http request CAutoInternetHandle hHttpRequest; hHttpRequest = CWininet::HttpOpenRequestA( hHttpSession, // hHttpSession _T("GET"), // lpszVerb lpszUrlPath, // lpszObjectName HTTP_VERSION, // lpszVersion link.GetBase(), // lpszReferer NULL, // lpszAcceptTypes INTERNET_FLAG_NO_AUTO_REDIRECT | INTERNET_FLAG_DONT_CACHE, // dwFlags 0); // dwContext if(!hHttpRequest) { TRACE(_T("CLinkLoader::LoadHTTP() - HttpOpenRequest() failed.")); return WininetFailed(link); } // Sent the http request if(!CWininet::HttpSendRequestA( hHttpRequest, // hHttpRequest m_strAdditionalHeaders, // lpszHeaders (DWORD)-1, // dwHeadersLength 0, // lpOptional 0)) // dwOptionalLength { TRACE(_T("CLinkLoader::LoadHTTP() - HttpSendRequest() failed.")); return WininetFailed(link); } TCHAR szQueryResult[nQueryResultBufferSize_c]; DWORD dwQueryLength = sizeof(szQueryResult); // Check the result status code if(!CWininet::HttpQueryInfoA( hHttpRequest, // hHttpRequest HTTP_QUERY_STATUS_CODE, // dwInfoLevel szQueryResult, // lpvBuffer &dwQueryLength, // lpdwBufferLength NULL)) // lpdwIndex { TRACE(_T("CLinkLoader::LoadHTTP() - HttpQueryInfo() failed.")); return WininetFailed(link); } // Check for 301 Move Permanently or 302 Move Temporarily if(_ttoi(szQueryResult) == 301 || _ttoi(szQueryResult) == 302) { // We can only redirect iMaxRedirectCount_c times if(iRedirectCount > iMaxRedirectCount_c) { return FALSE; } // Get the new location dwQueryLength = sizeof(szQueryResult); if(!CWininet::HttpQueryInfoA( hHttpRequest, // hHttpRequest HTTP_QUERY_LOCATION, // dwInfoLevel szQueryResult, // lpvBuffer &dwQueryLength, // lpdwBufferLength NULL)) // lpdwIndex { TRACE(_T("CLinkLoader::LoadHTTP() - HttpQueryInfo() failed.")); return WininetFailed(link); } // We only update the URL in link object if // we are redirecting from http://hostname/xyz to http://hostname/xyz/ if(link.GetURL().GetLength() + 1 == (int)dwQueryLength && link.GetURL().GetAt(link.GetURL().GetLength() - 1) != _TCHAR('/') && szQueryResult[dwQueryLength - 1] == _TCHAR('/')) { link.SetURL(szQueryResult); } // Crack the URL & call LoadHTTP again TCHAR szHostName[INTERNET_MAX_HOST_NAME_LENGTH]; TCHAR szUrlPath[INTERNET_MAX_URL_LENGTH]; // Crack the URL URL_COMPONENTS urlcomp; memset(&urlcomp, 0, sizeof(urlcomp)); urlcomp.dwStructSize = sizeof(urlcomp); urlcomp.lpszHostName = (LPTSTR) &szHostName; urlcomp.dwHostNameLength = INTERNET_MAX_HOST_NAME_LENGTH; urlcomp.lpszUrlPath = (LPTSTR) &szUrlPath; urlcomp.dwUrlPathLength = INTERNET_MAX_URL_LENGTH; VERIFY(CWininet::InternetCrackUrlA(szQueryResult, dwQueryLength, NULL, &urlcomp)); return LoadHTTP(link, fReadFile, szHostName, szUrlPath, ++iRedirectCount); } // Update the HTTP status code link.SetStatusCode(_ttoi(szQueryResult)); // If the status code is not 2xx. it is a invalid link if(szQueryResult[0] != '2') { link.SetState(CLink::eInvalidHTTP); // Get the new location dwQueryLength = sizeof(szQueryResult); if(CWininet::HttpQueryInfoA( hHttpRequest, // hHttpRequest HTTP_QUERY_STATUS_TEXT, // dwInfoLevel szQueryResult, // lpvBuffer &dwQueryLength, // lpdwBufferLength NULL)) // lpdwIndex { link.SetStatusText(szQueryResult); } return FALSE; } // Now we have a valid http link link.SetState(CLink::eValidHTTP); // If we are not reading the file, we can return now if(!fReadFile) { return TRUE; } // Check the result content-type dwQueryLength = sizeof(szQueryResult); if(!CWininet::HttpQueryInfoA( hHttpRequest, // hHttpRequest HTTP_QUERY_CONTENT_TYPE,// dwInfoLevel szQueryResult, // lpvBuffer &dwQueryLength, // lpdwBufferLength NULL)) // lpdwIndex { TRACE(_T("CLinkLoader::LoadHTTP() - HttpQueryInfo() failed.")); return WininetFailed(link); } // We only load the html text for parsing if(!_tcsstr(szQueryResult, _T("text/html")) ) { return TRUE; } link.SetContentType(CLink::eText); CString strBuffer; TCHAR buf[nReadFileBufferSize_c]; DWORD dwBytesRead; // Load the text html in a loop do { memset(buf, 0, sizeof(buf)); if(CWininet::InternetReadFile( hHttpRequest, // hFile buf, // lpBuffer sizeof(buf), // dwNumberOfBytesToRead &dwBytesRead)) // lpNumberOfBytesRead { strBuffer += buf; } else { TRACE(_T("CLinkLoader::LoadHTTP() - InternetReadFile() failed.")); return WininetFailed(link); } } while(dwBytesRead); // Set the InternetReadFile result in the link object link.SetData(strBuffer); return TRUE; } // CLinkLoader::LoadHTTP BOOL CLinkLoader::WininetFailed( CLink& link ) /*++ Routine Description: Wininet failed clean up subroutine Arguments: link - reference to the result link object Return Value: BOOL - Alway return TRUE --*/ { link.SetState(CLink::eInvalidWininet); link.SetStatusCode(GetLastError()); TRACE(_T(" GetLastError() = %d\n"), link.GetStatusCode()); LPTSTR lpMsgBuf; if(FormatMessage( FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_HMODULE | FORMAT_MESSAGE_FROM_SYSTEM, CWininet::GetWininetModule(), GetLastError(), MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), // Default language (LPTSTR) &lpMsgBuf, 0, NULL) > 0) { link.SetStatusText(lpMsgBuf); LocalFree(lpMsgBuf); } return FALSE; } // CLinkLoader::WininetFailed