//+--------------------------------------------------------------------------- // // Copyright (C) 1996, Microsoft Corporation // // File: docsum.hxx // // Contents: document summary helper classes // // Classes: CDocCharacterization, CSummaryText // // History: 12-Jan-96 dlee Created // //---------------------------------------------------------------------------- #pragma once #include extern const GUID guidDocSummary; extern const GUID guidCharacterization; // this is the ole 2 / ms office summary guid its property ids #define defGuidDocSummary { 0xf29f85e0, \ 0x4ff9, 0x1068, \ 0xab, 0x91, 0x08, 0x00, \ 0x2b, 0x27, 0xb3, 0xd9 } const PROPID propidTitle = 2; const PROPID propidSubject = 3; const PROPID propidAuthor = 4; const PROPID propidKeywords = 5; const PROPID propidComments = 6; const PROPID propidTemplate = 7; const PROPID propidLastAuthor = 8; const PROPID propidRevNumber = 9; const PROPID propidAppName = 0x12; // guid and property ids used by the html filter #define defGuidHtmlInformation { 0x70eb7a10, \ 0x55d9, 0x11cf, \ 0xb7, 0x5b, 0x00, 0xaa, \ 0x00, 0x51, 0xfe, 0x20 } const PROPID PID_HEADING_1 = 3; const PROPID PID_HEADING_2 = 4; const PROPID PID_HEADING_3 = 5; const PROPID PID_HEADING_4 = 6; const PROPID PID_HEADING_5 = 7; const PROPID PID_HEADING_6 = 8; const unsigned propidCharacterization = 2; // constant used to separate parts of a characterization #define awcSummarySpace L". " const unsigned cwcSummarySpace = 2; // maximum amount of raw text used at once const ULONG cwcMaxRawUsed = 600; // These scores are just guidelines; any value can be used for a // summary utility. const unsigned scoreInfinity = 30000; const unsigned scoreHtmlDescription = 17000; const unsigned scoreTitle = 16000; const unsigned scoreAbstract = 15000; const unsigned scoreSubject = 14000; const unsigned scoreKeywords = 13000; const unsigned scoreComments = 12000; const unsigned scoreHeader1 = 10000; const unsigned scoreHeader2 = 9000; const unsigned scoreHeader3 = 8000; const unsigned scoreHeader4 = 7000; const unsigned scoreHeader5 = 6000; const unsigned scoreHeader6 = 5000; const unsigned scoreRawText = 4000; const unsigned scoreOtherProperty = 3000; const unsigned scoreIfNothingElse = 10; const unsigned scoreIgnore = 0; //+------------------------------------------------------------------------- // // Class: CSummaryText // // Purpose: Characterizations are built up with these objects // // History: 12-Jan-96 dlee Created // //-------------------------------------------------------------------------- class CSummaryText { public: CSummaryText( WCHAR * pwcText, unsigned cwc, unsigned utility ) : _pwcText( pwcText ), _cwcText( cwc ), _utility( utility ) {} CSummaryText() {} BOOL isSame( const WCHAR * pwc, unsigned cwc ) { return !wcsncmp( pwc, _pwcText, cwc ); } WCHAR * GetText() { return _pwcText; } void SetText( WCHAR * pwcText ) { _pwcText = pwcText; } unsigned GetUtility() { return _utility; } // methods needed by priority-queue template unsigned GetSize() { return _cwcText; } // keep the worst items at the top of the queue BOOL IsGreaterThan( CSummaryText & rOther ) { return _utility < rOther._utility; } private: WCHAR * _pwcText; unsigned _cwcText; unsigned _utility; }; //+------------------------------------------------------------------------- // // Class: CDocCharacterization // // Purpose: Builds characterizations // // History: 12-Jan-96 dlee Created // //-------------------------------------------------------------------------- class CDocCharacterization { public: CDocCharacterization( unsigned cwcAtMost ); ~CDocCharacterization(); void Add( CStorageVariant const & var, CFullPropSpec & ps ); void Add( const WCHAR * pwcSummary, unsigned cwcSummary, FULLPROPSPEC & ps ); void Get( WCHAR * awcSummary, unsigned & cwcSummary, BOOL fUseRawText ); BOOL HasCharacterization() { return _fIsGenerating; } private: void Ignore( const WCHAR * pwcIgnore, unsigned cwcText ); BOOL Add( const WCHAR * pwcSummary, unsigned cwcSummary, unsigned utility, BOOL fYankNoise = TRUE ); void AddRawText( const WCHAR * pwcRawText, unsigned cwcText ); BOOL AddCleanedString( const WCHAR * pwcSummary, unsigned cwcSummary, unsigned utility, BOOL fDeliniate ); void YankNoise( const WCHAR * pwcIn, WCHAR * pwcOut, unsigned & cwc ); void RemoveLowScoringItems( unsigned iLimit ); BOOL _fIsGenerating; unsigned _scoreRawText; TPriorityQueue _queue; enum { cwcMaxIgnoreBuf = 100 }; WCHAR _awcIgnoreBuf[ cwcMaxIgnoreBuf ]; unsigned _cwcIgnoreBuf; XArray _awcMetaDescription; BOOL _fMetaDescriptionAdded; };