//+--------------------------------------------------------------------------- // // Microsoft Windows // Copyright (C) Microsoft Corporation, 1994 // // File: persist.cxx // // Contents: Implmentation of Office9 Thicket Save API // //---------------------------------------------------------------------------- #include "priv.h" #include #include #include // fake out mimeole.h's dll linkage directives for our delay load stuff in dllload.c #define _MIMEOLE_ #define DEFINE_STRCONST #include #include "resource.h" #include "packager.h" #include "reload.h" #include #define DEFINE_STRING_CONSTANTS #pragma warning( disable : 4207 ) #include "htmlstr.h" #pragma warning( default : 4207 ) const GUID CLSID_IMimeInternational = {0xfd853cd9, 0x7f86, 0x11d0, {0x82, 0x52, 0x0, 0xc0, 0x4f, 0xd8, 0x5a, 0xb4}}; const GUID IID_IMimeInternational = {0xc5588349, 0x7f86, 0x11d0, {0x82, 0x52, 0x0, 0xc0, 0x4f, 0xd8, 0x5a, 0xb4}}; const GUID IID_IMimeBody = {0xc558834c, 0x7f86, 0x11d0, {0x82, 0x52, 0x0, 0xc0, 0x4f, 0xd8, 0x5a, 0xb4}}; // Trident legacy defines... #define RRETURN(hr) return hr; #define ReleaseInterface(punk) { if (punk) punk->Release(); punk = NULL; } // Local prototypes void RemoveBookMark(WCHAR *pwzURL, WCHAR **ppwzBookMark); void RestoreBookMark(WCHAR *pwzBookMark); HRESULT HrGetElement(IHTMLDocument2 *pDoc, LPCSTR pszName, IHTMLElement **ppElem); HRESULT HrGetBodyElement(IHTMLDocument2 *pDoc, IHTMLBodyElement **ppBody); HRESULT HrSetMember(LPUNKNOWN pUnk, BSTR bstrMember, BSTR bstrValue); HRESULT HrGetCollectionOf(IHTMLDocument2 *pDoc, BSTR bstrTagName, IHTMLElementCollection **ppCollect); HRESULT HrGetCollectionItem(IHTMLElementCollection *pCollect, ULONG uIndex, REFIID riid, LPVOID *ppvObj); ULONG UlGetCollectionCount(IHTMLElementCollection *pCollect); HRESULT HrGetMember(LPUNKNOWN pUnk, BSTR bstrMember,LONG lFlags, BSTR *pbstr); HRESULT HrLPSZToBSTR(LPCSTR lpsz, BSTR *pbstr); HRESULT HrBSTRToLPSZ(BSTR bstr, LPSTR *lplpsz); HRESULT HrGetCombinedURL( IHTMLElementCollection *pCollBase, LONG cBase, LONG lElemPos, BSTR bstrRelURL, BSTR bstrDocURL, BSTR *pbstrBaseURL); class CHashEntry { public: CHashEntry(void) : m_bstrKey(NULL), m_bstrValue(NULL), m_pheNext(NULL) {}; ~CHashEntry(void) { if (m_bstrKey) SysFreeString(m_bstrKey); if (m_bstrValue) SysFreeString(m_bstrValue); } BOOL SetKey(BSTR bstrKey) { ASSERT(m_bstrKey==NULL); m_bstrKey = SysAllocString(bstrKey); return m_bstrKey != NULL; } BOOL SetValue(BSTR bstrValue) { ASSERT(m_bstrValue==NULL || !StrCmpIW(m_bstrValue, c_bstr_BLANK) || !StrCmpIW(m_bstrValue, bstrValue)); m_bstrValue = SysAllocString(bstrValue); return m_bstrValue != NULL; } BSTR m_bstrKey; BSTR m_bstrValue; CHashEntry *m_pheNext; }; class CWebArchive { public: CWebArchive(CThicketProgress* ptp=NULL); ~CWebArchive(void); virtual HRESULT Init( LPCTSTR lpstrDoc, DWORD dwHashSize ); virtual HRESULT AddURL( BSTR bstrURL, CHashEntry **pphe ) = 0; virtual HRESULT AddFrameOrStyleEntry( BSTR bstrURL, CHashEntry **pphe, LPTSTR lpstrFrameDoc ) = 0; virtual HRESULT Find(BSTR bstrF, CHashEntry **pphe); virtual HRESULT Commit(void); virtual HRESULT Revert(void); virtual HRESULT ArchiveDocumentText(IHTMLDocument2 *pDoc, UINT cpDoc, BOOL fFrameDoc) = 0; virtual HRESULT ArchiveCSSText( BSTR bstrCSSUrl, LPCSTR lpszSSText, LPCTSTR lpszStyleDoc ) = 0; protected: LPTSTR m_lpstrDoc; // Desintation file for thicket document LPTSTR m_lpstrSafeDoc; // Temp name of original file, which we delete on Commit() CThicketProgress* m_ptp; enum ThURLType { thurlMisc, thurlHttp, thurlFile }; ThURLType _GetURLType( BSTR bstrURL ); HRESULT _BackupOldFile(void); // hash table stuff stolen from MIMEEDIT HRESULT _Insert(BSTR bstrI, BSTR bstrThicket, CHashEntry **pphe); inline DWORD Hash(LPWSTR psz); DWORD m_cBins; CHashEntry *m_rgBins; }; class CThicketArchive : public CWebArchive { public: CThicketArchive(CThicketProgress* ptp=NULL); ~CThicketArchive(void); virtual HRESULT Init( LPCTSTR lpstrDoc, DWORD dwHashSize ); virtual HRESULT AddURL( BSTR bstrURL, CHashEntry **pphe ); virtual HRESULT AddFrameOrStyleEntry( BSTR bstrURL, CHashEntry **pphe, LPTSTR lpstrFrameDoc ); virtual HRESULT Commit(void); virtual HRESULT Revert(void); virtual HRESULT ArchiveDocumentText(IHTMLDocument2 *pDoc, UINT cpDoc, BOOL fFrameDoc); virtual HRESULT ArchiveCSSText( BSTR bstrCSSUrl, LPCSTR lpszSSText, LPCTSTR lpszStyleDoc ); protected: LPTSTR m_lpstrFilesDir; // directory for document's supporting files. LPTSTR m_lpstrFilesDirName; // suffix of m_lpstrFilesDir LPTSTR m_lpstrSafeDir; // Temp name of original files directory, which we delete on Commit() BOOL m_fFilesDir; // TRUE if m_lpstrFilesDir has been created. HRESULT _ApplyMarkOfTheWeb( IHTMLDocument2 *pDoc, LPSTREAM pstm, BOOL fUnicode ); HRESULT _AddHttpEntry( BSTR bstrURL, CHashEntry **pphe, LPTSTR lpstrDstFile, LPTSTR lpstrSrcFile=NULL ); HRESULT _AddFileEntry( BSTR bstrURL, CHashEntry **pphe, LPTSTR lpstrDstFile, LPTSTR lpstrSrcFile=NULL ); HRESULT _AddMiscEntry( BSTR bstrURL, CHashEntry **pphe, LPTSTR lpstrDstFile, int cchDstFile ); HRESULT _PersistHttpURL( BSTR bstrURL, CHashEntry **pphe ); HRESULT _PersistFileURL( BSTR bstrURL, CHashEntry **pphe ); HRESULT _PersistMiscURL( BSTR bstrURL, CHashEntry **pphe ); HRESULT _BackupOldDirectory(void); HRESULT _RemoveOldDirectoryAndChildren( LPCWSTR pszDir ); HRESULT _Insert(BSTR bstrI, LPTSTR lpszFile, int cchFile, CHashEntry **pphe); }; class CMHTMLArchive : public CWebArchive { public: CMHTMLArchive(CThicketProgress* ptp=NULL); ~CMHTMLArchive(void); virtual HRESULT Init( LPCTSTR lpstrDoc, DWORD dwHashSize ); virtual HRESULT AddURL( BSTR bstrURL, CHashEntry **pphe ); virtual HRESULT AddFrameOrStyleEntry( BSTR bstrURL, CHashEntry **pphe, LPTSTR lpstrFrameDoc ); virtual HRESULT ArchiveDocumentText(IHTMLDocument2 *pDoc, UINT cpDoc, BOOL fFrameDoc); virtual HRESULT ArchiveCSSText( BSTR bstrCSSUrl, LPCSTR lpszSSText, LPCTSTR lpszStyleDoc ); virtual HRESULT SetCharset(UINT uiCharset, CSETAPPLYTYPE csat, IMimeBody *pBody); protected: HBODY m_hBodyAlt; IMimeMessage *m_pimm; }; /* * The following classes implement extended Save As MTHML functionality. * Access to the extended functionality is controlled by new MECD_ flags * defined in mimeole.h. Clients of the C API in this module should notice * mimimal change in its behavior. ( limited to the additional inclusion * table and table cell background images ). * * The root idea is that of a collection packager, which takes a subset * of the document.all collection, filters the elements of that subcollection, * and marshall's the element data into the MIMEOle document This is patterned * after the existing PackageImageData routine, and relies heavily on * HrAddImageToMessage, which is much more general than its name implies. * * * Stylesheets introduce some repetition, as the stylesheet OM is similar, * but not similar enough, to support common base classes specialized via * templates. * * The process of adding new packagers is pretty straight-forward. * [1] (a) if the packaged attribute is a complete URL, derive from CCollectionPackager * (b) if the attribute is a relative URL, derive from CRelativeURLPackager * [2] Implement InitFromCollection. Have it call _InitSubCollection() with the tag name. * See CImagePackager::InitFromCollection() as a simple example. * [3] Implement _GetTargetAttribute() to return the attribute you want to package. * You may want to add the string constants for [2] and [3] to htmlstr.h * [4] Define an MECD_ control flag, if the thing you're packaging is new. * [5] Add a local var of your packager type to CDocumentPackager::PackageDocument. * [6] Follow the pattern of the other packagers in CDocumentPackager::PackageDocument * * For elements with multiple persisted attributes, it's dealer's choice as to how * to approach it. Write seperate, simpler packagers for each attribute or write * one packager that deals with all of the target element's attributes. */ /* * CCollectionPackager - abstract base class for HTML element packagers. * Implements subsampling from the all collection, iteration over the * collection, and basic packaging functionality. * * Derived classes must implement InitFromCollection and _GetTargetAttribute. * InitFromCollection - derived class should store the desired subset of the * input collection into the m_pColl data member. _InitSubCollection is * a useful method for this purpose. * _GetTargetAttribute - derived class should return a BSTR naming the attribute * of the element to be packaged. * */ class CCollectionPackager { public: virtual ~CCollectionPackager(void); virtual HRESULT InitFromCollection(IHTMLElementCollection *pColl, ULONG *pcElems = NULL) = 0; virtual HRESULT PackageData(CWebArchive *pwa, BOOL *pfCancel = NULL, CThicketProgress *ptp = NULL, ULONG progLow = 0, ULONG progHigh = 100) { return _PackageData( pwa, m_pColl, pfCancel, ptp, progLow, progHigh ); } protected: CCollectionPackager(void) : m_pColl(NULL), m_fAddCntLoc(FALSE) {}; HRESULT _InitSubCollection(IHTMLElementCollection *pAll, BSTR bstrTagName, IHTMLElementCollection **ppSub, ULONG *pcElems = NULL); virtual BSTR _GetTargetAttribute(void) = 0; virtual HRESULT _GetElementURL(IHTMLElement *pElem, BSTR *pbstrURL); virtual HRESULT _PackageData(CWebArchive *pwa, IHTMLElementCollection *pColl, BOOL *pfCancel = NULL, CThicketProgress *ptp = NULL, ULONG progLow = 0, ULONG progHigh = 100); virtual HRESULT _PackageElement(CWebArchive *pwa, IHTMLElement *pElem); IHTMLElementCollection *m_pColl; BOOL m_fAddCntLoc; }; /* * CImagePackager - packages the src's of IMG tags. */ class CImagePackager : public CCollectionPackager { public: CImagePackager(void) {}; virtual ~CImagePackager(void) {}; virtual HRESULT InitFromCollection(IHTMLElementCollection *pColl, ULONG *pcElems = NULL); protected: virtual BSTR _GetTargetAttribute(void); }; /* * CInputImgPackager - packages INPUT type="image" */ class CInputImgPackager : public CImagePackager { public: CInputImgPackager() {} virtual ~CInputImgPackager() {} virtual HRESULT InitFromCollection(IHTMLElementCollection *pColl, ULONG *pcElems = NULL); }; /* * CBGSoundsPackager - packages background sounds */ class CBGSoundsPackager : public CCollectionPackager { public: CBGSoundsPackager() {}; virtual ~CBGSoundsPackager() {}; virtual HRESULT InitFromCollection(IHTMLElementCollection *pColl, ULONG *pcElems = NULL); protected: virtual BSTR _GetTargetAttribute(void); }; /* * CAnchorAdjustor - modifies anchor hrefs. * * Makes them absolute if they point out of the collection. */ class CAnchorAdjustor : public CCollectionPackager { public: CAnchorAdjustor(void) {}; virtual ~CAnchorAdjustor(void) {}; virtual HRESULT InitFromCollection(IHTMLElementCollection *pColl, ULONG *pcElems = NULL); protected: virtual BSTR _GetTargetAttribute(void); virtual HRESULT _PackageElement(CWebArchive *pwa, IHTMLElement *pElem); }; /* * CAreaAdjustor - modifies AREA hrefs. * * Makes them absolute if they point out of the collection. Same filter * as the anchor adjustor, but different tag. */ class CAreaAdjustor : public CAnchorAdjustor { public: CAreaAdjustor(void) {}; virtual ~CAreaAdjustor(void) {}; virtual HRESULT InitFromCollection(IHTMLElementCollection *pColl, ULONG *pcElems = NULL); }; /* * CBaseNeutralizer - resets any and all tags to the d. * * No actual packaging goes on here, but we do remap the * href. */ class CBaseNeutralizer : public CCollectionPackager { public: CBaseNeutralizer(void) : m_bstrLocal(NULL), m_pTree(NULL) {}; virtual ~CBaseNeutralizer(void); virtual HRESULT InitFromCollection(IHTMLElementCollection *pColl, ULONG *pcElems = NULL ) { return InitFromCollection( pColl, pcElems, NULL ); }; HRESULT InitFromCollection(IHTMLElementCollection *pColl, ULONG *pcElems = NULL, IHTMLDocument2 *pDoc = NULL); virtual HRESULT PackageData(CWebArchive *pwa, BOOL *pfCancel = NULL, CThicketProgress *ptp = NULL, ULONG progLow = 0, ULONG progHigh = 100); protected: virtual BSTR _GetTargetAttribute(void); virtual HRESULT _PackageElement(CWebArchive *pwa, IHTMLElement *pElem); BSTR m_bstrLocal; IMarkupServices *m_pTree; }; /* * CRelativeURLPackager - abstract base class for packagers * whose element's source attribute returns a relative URL. * This class implements triutils.pp's GetBackgroundImageUrl's * process of attempting to combine the (relative) element URL * with the nearest URL. If no is availaible, it * uses the document URL. * * This class is an abstract base because it does not implement * _GetTargetAttribute. It's implementation of InitFromCollection * isn't very useful and will probably be overridden by derived * classes. */ class CRelativeURLPackager : public CCollectionPackager { public: CRelativeURLPackager(void) : m_pCollBase(NULL), m_cBase(0), m_bstrDocURL(NULL) {}; virtual ~CRelativeURLPackager(void); virtual HRESULT InitFromCollection(IHTMLElementCollection *pColl, ULONG *pcElems = NULL) { return Init( pColl, pcElems, NULL ); } virtual HRESULT Init(IHTMLElementCollection *pColl, ULONG *pcElems, IHTMLDocument2 *pDoc); protected: virtual HRESULT _GetElementURL(IHTMLElement *pElem, BSTR *pbstrURL); IHTMLElementCollection *m_pCollBase; // collection of BASE tags used to complete URLs ULONG m_cBase; BSTR m_bstrDocURL; }; /* * CBackgroundPackager - packages the background of BODY, TABLE, TD, and TH. * * These three tags have a common target attribute. */ class CBackgroundPackager : public CRelativeURLPackager { public: CBackgroundPackager(void) {}; ~CBackgroundPackager(void) {}; virtual HRESULT PackageData(CWebArchive *pwa, BOOL *pfCancel, CThicketProgress *ptp = NULL, ULONG progLow = 0, ULONG progHigh = 100); protected: virtual BSTR _GetTargetAttribute(void); }; /* * CDynSrcPackager - packages the dynsrc of IMG and INPUT. * * These two tags have a common target attribute. */ class CDynSrcPackager : public CRelativeURLPackager { public: CDynSrcPackager(void) {}; ~CDynSrcPackager(void) {}; virtual HRESULT PackageData(CWebArchive *pwa, BOOL *pfCancel, CThicketProgress *ptp = NULL, ULONG progLow = 0, ULONG progHigh = 100); protected: virtual BSTR _GetTargetAttribute(void); }; /* * CScriptPackager - packages the dynsrc of IMG and INPUT. * * These two tags have a common target attribute. */ class CScriptPackager : public CRelativeURLPackager { public: CScriptPackager(void) : m_pCollScripts(NULL) {}; ~CScriptPackager(void) { if (m_pCollScripts) m_pCollScripts->Release(); }; virtual HRESULT PackageData(CWebArchive *pwa, BOOL *pfCancel = NULL, CThicketProgress *ptp = NULL, ULONG progLow = 0, ULONG progHigh = 100) { return _PackageData( pwa, m_pCollScripts, pfCancel, ptp, progLow, progHigh ); } virtual HRESULT Init(IHTMLElementCollection *pColl, ULONG *pcElems = NULL, IHTMLDocument2 *pDoc = NULL); protected: virtual BSTR _GetTargetAttribute(void); IHTMLElementCollection *m_pCollScripts; }; /* * CFramesPackager - packages the and