/*++ Copyright (c) 1996 Microsoft Corporation Module Name : linkpars.cpp Abstract: Link parser class implementation. This class responsible for parsing the html file for hyperlink. Author: Michael Cheuk (mcheuk) Project: Link Checker Revision History: --*/ #include "stdafx.h" #include "LinkPars.h" #include "link.h" #include "lcmgr.h" #ifdef _DEBUG #define new DEBUG_NEW #undef THIS_FILE static char THIS_FILE[] = __FILE__; #endif // Constants const CString strLocalHost_c(_T("localhost")); void CLinkParser::Parse( const CString& strData, const CString& strBaseURL, CLinkPtrList& rLinkPtrList ) /*++ Routine Description: Parse a page of html data Arguments: strData - page of html strBaseURL - base URL rLinkPtrList - reference to links list. The new links will will be added to this list. Return Value: N/A --*/ { // Look for the first '<' LPCTSTR lpszOpen = _tcschr(strData, _TUCHAR('<')); while(lpszOpen != NULL) { // Look for the '>' LPCTSTR lpszClose = _tcschr(lpszOpen, _TUCHAR('>')); if(lpszClose) { // The possible tag must be longer than 7 bytes (a href=) int iCount = (int)(lpszClose - lpszOpen) - 1; // skip the '<' if( iCount > 7 ) { int iIndex = lpszOpen - ((LPCTSTR)strData) + 1; // skip the '<' CString strPossibleURL(strData.Mid(iIndex, iCount)); // Parse the possible tag if(ParsePossibleTag(strPossibleURL)) { CString strURL; BOOL fLocalLink; // We found a valid tag. Time to create new link. if( CreateURL(strPossibleURL, strBaseURL, strURL, fLocalLink) ) { rLinkPtrList.AddLink(strURL, strBaseURL, strPossibleURL, fLocalLink); } } } } // Look for the next '<' lpszOpen = _tcschr(++lpszOpen, _TUCHAR('<')); } } // CLinkParser::Parse BOOL CLinkParser::ParsePossibleTag( CString& strTag ) /*++ Routine Description: Parse a single "<.....>" for possible hyperlink Arguments: strTag - value inside a "<.....>" excluding '<' & '>' If this is a hyperlink tag, the hyperlink URL will be put in strTag. Return Value: BOOL - TRUE if hyperlink tag. FALSE otherwise. --*/ { // Make a working copy CString strWorkCopy(strTag); // Let's work with lower case strWorkCopy.MakeLower(); // // Check for, // // HyperLink: // // // // // CGI // // // Style Sheet // // if( strWorkCopy[0] == _T('a') || strWorkCopy.Find(_T("link")) == 0 ) { return GetTagValue(strTag, CString(_T("href"))); } // // Check for, // // // // Table: // //
// // else if( strWorkCopy.Find(_T("body")) == 0 || strWorkCopy.Find(_T("table")) == 0 || strWorkCopy.Find(_T("th")) == 0 || strWorkCopy.Find(_T("td")) == 0 ) { return GetTagValue(strTag, CString(_T("background"))); } // // Check for, // // Sound: // // // // Frame: // // // Netscape embeded: // // // JavaScript & VB Script //