windows-nt/Source/XPSP1/NT/inetsrv/query/apps/srch/brdoc.cxx

//+-------------------------------------------------------------------------
//
//  Microsoft Windows
//  Copyright (C) Microsoft Corporation, 1992 - 2000.
//
//  File:       document.cxx
//
//  Contents:   The Document part of the browser
//
//--------------------------------------------------------------------------

#include <pch.cxx>
#pragma hdrstop

#define TheSearch pSearch

const int UNICODE_PARAGRAPH_SEPARATOR=0x2029;

const GUID guidStorage = PSGUID_STORAGE;

//+-------------------------------------------------------------------------
//
//  Member:     Position::Compare, public
//
//  Synopsis:   Compare two positions
//
//--------------------------------------------------------------------------

int Position::Compare( const Position& pos ) const
{
   int diff = _para - pos.Para();
   if ( diff == 0 )
      diff = _begOff - pos.BegOff();
   return diff;
}

//+-------------------------------------------------------------------------
//
//  Member:     Hit::Hit, public
//
//  Synopsis:   Create hit from an array of positions
//
//--------------------------------------------------------------------------

Hit::Hit( const Position * aPos, unsigned cPos )
: _cPos(cPos)
{
    _aPos = new Position[cPos];

    memcpy( _aPos, aPos, sizeof(Position) * cPos );
}

Hit::~Hit()
{
    delete _aPos;
}

//+-------------------------------------------------------------------------
//
//  Member:     HitIter::GetPositionCount, public
//
//  Synopsis:   return number of positions or zero
//
//--------------------------------------------------------------------------

int HitIter::GetPositionCount() const
{
    if (_iHit < _pDoc->_cHit && _pDoc->_aHit[_iHit])
        return _pDoc->_aHit[_iHit]->Count();

    return 0;
}

//+-------------------------------------------------------------------------
//
//  Member:     HitIter::GetPosition, public
//
//  Synopsis:   return position by value
//
//--------------------------------------------------------------------------

Position HitIter::GetPosition ( int i ) const
{
     if ( _iHit < _pDoc->_cHit && _pDoc->_aHit[_iHit] )
          return _pDoc->_aHit[_iHit]->GetPos(i);
     else
     {
          Position pos;
          return( pos );
     }
}

//+-------------------------------------------------------------------------
//
//  Member:     Document::Document, public
//
//  Synopsis:   Initialize document with filename
//
//--------------------------------------------------------------------------

Document::Document(WCHAR const* filename, LONG rank, BOOL fDelete)
: _filename(0),
  _rank (rank),
  _buffer(0),
  _bufLen(0),
  _bufEnd(0),
  _pFilter(0),
  _aParaOffset(0),
  _isInit(FALSE),
  _cHit(0),
  _aParaLine(0),
  _maxParaLen(0),
  _cPara(0),
  _chunkCount(0),
  _fDelete( fDelete )
{
    _filename = new WCHAR[ wcslen( filename ) + 1 ];
    wcscpy( _filename, filename );
}

//+-------------------------------------------------------------------------
//
//  Member:     Document::Document, public
//
//  Synopsis:   Initialize document
//
//--------------------------------------------------------------------------

Document::Document()
: _filename(0),
  _buffer(0),
  _bufLen(0),
  _bufEnd(0),
  _pFilter(0),
  _aParaOffset(0),
  _isInit(FALSE),
  _cHit(0),
  _aParaLine(0),
  _maxParaLen(0),
  _cPara(0),
  _chunkCount(0),
  _fDelete( FALSE )
{}

//+-------------------------------------------------------------------------
//
//  Member:     Document::~Document, public
//
//  Synopsis:   Free document
//
//--------------------------------------------------------------------------

Document::~Document()
{
    Free();
}

//+-------------------------------------------------------------------------
//
//  Member:     Document::Free, public
//
//  Synopsis:   Free document storage
//
//--------------------------------------------------------------------------

void Document::Free()
{
    if ( 0 != _filename )
    {
        if ( _fDelete )
            DeleteFile( _filename );

        delete [] _filename;
    }

    if (!_isInit)
        return;

    for ( unsigned i = 0; i < _cHit; i++ )
    {
        delete _aHit[i];
        _aHit[i] = 0;
    }

    // _aHit is embedded

    delete []_aParaOffset;
    _aParaOffset = 0;

    if (_aParaLine)
    {
        for (int i = 0; i < _cPara; i++)
        {
            while (_aParaLine[i].next != 0)
            {
                ParaLine* p = _aParaLine[i].next;
                _aParaLine[i].next = _aParaLine[i].next->next;
                delete p;
            }
        }
        delete _aParaLine;
    }

    delete _buffer;

    _buffer = 0;

    _bufEnd = 0;
    _cHit = 0;

    _isInit = FALSE;
} //Free

//+-------------------------------------------------------------------------
//
//  Member:     Document::Init, public
//
//  Synopsis:   Read-in file, fill array of hits
//
//--------------------------------------------------------------------------

SCODE Document::Init(ISearchQueryHits *pSearch)
{
    BOOL noHits = FALSE;

    SCODE sc = S_OK;

    TRY
    {
        AllocBuffer( _filename );
        BindToFilter( _filename );

        ULONG ulFlags;
        sc = _pFilter->Init( IFILTER_INIT_CANON_PARAGRAPHS |
                             IFILTER_INIT_CANON_HYPHENS |
                             IFILTER_INIT_APPLY_INDEX_ATTRIBUTES,
                             0, 0, &ulFlags );

        if (FAILED (sc))
            THROW (CException(sc));

        ReadFile();

        BreakParas();

        if (Paras() != 0)
        {
            BreakLines();

#if 0
            // some filters don't behave correctly if you just re-init them,
            // so release the filter and re-open it.

            _pFilter->Release();
            _pFilter = 0;
            BindToFilter();
#endif

            sc = _pFilter->Init ( IFILTER_INIT_CANON_PARAGRAPHS |
                                  IFILTER_INIT_CANON_HYPHENS |
                                  IFILTER_INIT_APPLY_INDEX_ATTRIBUTES,
                                  0, 0, &ulFlags );
            sc = TheSearch->Init( _pFilter, ulFlags );

            if (FAILED (sc))
            {
                if ( QUERY_E_ALLNOISE != sc )
                    THROW (CException(sc));
                // we can still show the file

                sc = S_OK;
                noHits = TRUE;
            }

            // SUCCESS
            _isInit = TRUE;
        }
    }
    CATCH ( CException, e )
    {
        _isInit = FALSE;
        sc = e.GetErrorCode();
    }
    END_CATCH;

    if (!noHits)
    {
        //
        // pull up all the hits
        //

        ULONG count;
        FILTERREGION* aRegion;
        SCODE sc = TheSearch->NextHitOffset ( &count, &aRegion );

        while (sc == S_OK)
        {
            XCoMem<FILTERREGION> xRegion( aRegion );

            CDynArrayInPlace<Position> aPos( count );

            for (unsigned i = 0; i < count; i++)
                aPos [i] = RegionToPos ( aRegion [i] );

            xRegion.Free();

            XPtr<Hit> xHit( new Hit( aPos.GetPointer(), count ) );

            _aHit[_cHit] = xHit.Get();
            _cHit++;
            xHit.Acquire();

            sc = TheSearch->NextHitOffset ( &count, &aRegion );
        }
    }
    else
    {
        _cHit = 0;
        _isInit = (_bufEnd - _buffer) != 0;
    }

    if ( _pFilter )
    {
        _pFilter->Release();
        _pFilter = 0;
    }

    return _isInit ? S_OK : sc;
}

Position Document::RegionToPos ( FILTERREGION& region )
{
    static int paraHint = 0;
    static int iChunkHint = 0;
    static Position posNull;

    ULONG offset = ULONG (-1);

    // translate region to offset into buffer
    if (iChunkHint >= _chunkCount || _chunk[iChunkHint].ChunkId() != region.idChunk )
    {
        iChunkHint = 0;

        while ( iChunkHint < _chunkCount && _chunk[iChunkHint].ChunkId() < region.idChunk )
        {
            iChunkHint++;
        }

        if (iChunkHint >= _chunkCount || _chunk[iChunkHint].ChunkId() != region.idChunk)
            return posNull;
    }

    Win4Assert ( iChunkHint < _chunkCount );
    Win4Assert ( _chunk[iChunkHint].ChunkId() == region.idChunk );

    offset = _chunk[iChunkHint].Offset() + region.cwcStart;

    if (paraHint >= _cPara || _aParaOffset[paraHint] > offset )
        paraHint = 0;

    Win4Assert ( _aParaOffset[paraHint] <= offset );

    for ( ; paraHint <= _cPara; paraHint++)
    {
        // _aParaOffset[_cPara] is valid!

        if (_aParaOffset[paraHint] > offset)
        {
            Win4Assert (paraHint > 0);
            paraHint--;
            return Position ( paraHint,
                              offset - _aParaOffset[paraHint],
                              region.cwcExtent );
        }
    }

    return posNull;
}

//+-------------------------------------------------------------------------
//
//  Member:     Document::AllocBuffer, public
//
//  Synopsis:   Allocate buffer for file text
//
//--------------------------------------------------------------------------

void Document::AllocBuffer ( WCHAR const * pwcPath )
{
    //
    //  We should keep allocating buffers on demand,
    //  but for this simple demo we'll just get the
    //  file size up front and do a single buffer
    //  allocation of 2.25 the size (to accommodate
    //  Unicode expansion). THIS IS JUST A DEMO!
    //

    HANDLE hFile = CreateFile ( pwcPath,
                               GENERIC_READ,
                               FILE_SHARE_READ,
                               0, // security
                               OPEN_EXISTING,
                               FILE_ATTRIBUTE_NORMAL,
                               0 ); // template

    if ( INVALID_HANDLE_VALUE == hFile )
        THROW( CException() );

    _bufLen = GetFileSize(hFile, 0 );
    CloseHandle ( hFile );

    // Unicode from ASCII, twice and then some

    _bufLen = 2 * _bufLen + _bufLen / 4 + 1;

    _buffer = new WCHAR [_bufLen + 1];
    _buffer[ _bufLen ] = 0;
}

typedef HRESULT (__stdcall * PFnLoadTextFilter)( WCHAR const * pwcPath,
                                                 IFilter ** ppIFilter );

PFnLoadTextFilter g_pLoadTextFilter = 0;

SCODE MyLoadTextFilter( WCHAR const *pwc, IFilter **ppFilter )
{
    if ( 0 == g_pLoadTextFilter )
    {
        g_pLoadTextFilter = (PFnLoadTextFilter) GetProcAddress( GetModuleHandle( L"query.dll" ), "LoadTextFilter" );

        if ( 0 == g_pLoadTextFilter )
            return HRESULT_FROM_WIN32( GetLastError() );
    }

    return g_pLoadTextFilter( pwc, ppFilter );
}

//+-------------------------------------------------------------------------
//
//  Member:     Document::BindToFilter, public
//
//  Synopsis:   Bind to appropriate filter for the document
//
//--------------------------------------------------------------------------

void Document::BindToFilter( WCHAR const * pwcPath )
{
    //
    // Bind to the filter interface
    //

    SCODE sc = LoadIFilter( pwcPath, 0, (void **)&_pFilter );

    if ( FAILED(sc) )
    {
        sc = MyLoadTextFilter( pwcPath, &_pFilter );
        if ( FAILED(sc) )
            THROW( CException(sc) );
    }
}

//+-------------------------------------------------------------------------
//
//  Member:     Document::ReadFile, public
//
//  Synopsis:   Read file into buffer using the filter
//
//--------------------------------------------------------------------------

void Document::ReadFile ()
{
    SCODE sc;
    ULONG lenSoFar = 0;
    int   cChunk = 0;
    BOOL  fSeenProp = FALSE;

    STAT_CHUNK statChunk;
    sc = _pFilter->GetChunk ( &statChunk );

    // what about all these glueing flags?
    // Take them into account at some point
    // to test more complicated chunking

    while (SUCCEEDED(sc)
          || FILTER_E_LINK_UNAVAILABLE == sc
          || FILTER_E_EMBEDDING_UNAVAILABLE == sc )
    {

        if ( SUCCEEDED( sc ) && (statChunk.flags & CHUNK_TEXT) )
        {
            // read the contents only

            if ( statChunk.attribute.guidPropSet == guidStorage &&
                 statChunk.attribute.psProperty.ulKind == PRSPEC_PROPID &&
                 statChunk.attribute.psProperty.propid == PID_STG_CONTENTS )
            {
                if ( statChunk.breakType != CHUNK_NO_BREAK )
                {
                    switch( statChunk.breakType )
                    {
                        case CHUNK_EOW:
                        case CHUNK_EOS:
                            _buffer[lenSoFar++] = L' ';
                            break;
                        case CHUNK_EOP:
                        case CHUNK_EOC:
                            _buffer[lenSoFar++] = UNICODE_PARAGRAPH_SEPARATOR;
                            break;
                    }
                }

                _chunk [cChunk].SetChunkId (statChunk.idChunk);
                Win4Assert ( cChunk == 0 || statChunk.idChunk > _chunk [cChunk - 1].ChunkId () );
                _chunk [cChunk].SetOffset (lenSoFar);
                cChunk++;

                do
                {
                    ULONG lenThis = _bufLen - lenSoFar;
                    if (lenThis == 0)
                        break;

                    sc = _pFilter->GetText( &lenThis, _buffer+lenSoFar );

                    // The buffer may be filled with zeroes.  Nice filter.

                    if ( SUCCEEDED(sc) && 0 != lenThis )
                    {
                        lenThis = __min( lenThis,
                                         wcslen( _buffer + lenSoFar ) );
                        lenSoFar += lenThis;
                    }
                }
                while (SUCCEEDED(sc));
            }
        } // if SUCCEEDED( sc )

        // next chunk, please
        sc = _pFilter->GetChunk ( &statChunk );
    }

    _bufEnd = _buffer + lenSoFar;

    Win4Assert( lenSoFar <= _bufLen );

    _chunkCount = cChunk;
}


//+-------------------------------------------------------------------------
//
//  Member:     Document::BreakParas, public
//
//  Synopsis:   Break document into paragraphs separated by line feeds
//
//--------------------------------------------------------------------------

#define PARAS 25

void Document::BreakParas()
{
    int maxParas = PARAS;
    _aParaOffset = new unsigned [ maxParas ];
    WCHAR * pCur = _buffer;
    _cPara = 0;
    _maxParaLen = 0;

    do
    {
        if ( _cPara == maxParas )
        {
            // grow array
            unsigned * tmp = new unsigned [maxParas * 2];
            for ( int n = 0; n < maxParas; n++ )
                tmp[n] = _aParaOffset[n];
            delete []_aParaOffset;
            _aParaOffset = tmp;
            maxParas *= 2;
        }
        _aParaOffset [_cPara] = (UINT)(pCur - _buffer);

        pCur = EatPara(pCur);

        _cPara++;

    } while ( pCur < _bufEnd );

    // store end of buffer offset as _aParaOffset[_cPara]

    if ( _cPara == maxParas )
    {
        // grow array
        unsigned * tmp = new unsigned [maxParas + 1];
        for ( int n = 0; n < maxParas; n++ )
            tmp[n] = _aParaOffset[n];
        delete []_aParaOffset;
        _aParaOffset = tmp;
        maxParas += 1;
    }

    _aParaOffset [_cPara] = (UINT)(pCur - _buffer - 1);
}

//+-------------------------------------------------------------------------
//
//  Member:     Document::EatPara, private
//
//  Synopsis:   Skip till the line feed
//
//--------------------------------------------------------------------------

WCHAR * Document::EatPara( WCHAR * pCur )
{
    // search for newline or null
    int pos = 0;
    int c;

    while ( pCur < _bufEnd
            && (c = *pCur) != L'\n'
            && c != L'\r'
            && c != L'\0'
            && c != UNICODE_PARAGRAPH_SEPARATOR )
    {
        pos++;
        pCur++;
    }
    // eat newline and/or carriage return
    pCur++;
    if ( pCur < _bufEnd
         && *(pCur-1) == L'\r'
         && *pCur == L'\n' )
         pCur++;

    if ( pos > _maxParaLen )
        _maxParaLen = pos;
    return pCur;
}

int BreakLine ( WCHAR* buf, int cwcBuf, int cwcMax )
{
    if (cwcBuf <= cwcMax)
        return cwcBuf;
    Win4Assert (cwcMax > 0);
    // look backwards for whitespace
    int len = cwcMax;
    int c = buf[len-1];
    while (c != L' ' && c != L'\t')
    {
        len--;
        if (len < 1)
            break;
        c = buf[len-1];
    }
    if (len == 0)
    {
        // a single word larger than screen width
        // try scanning forward
        len = cwcMax;
        c = buf[len];
        while (c != L' ' && c != L'\t')
        {
            len++;
            if (len == cwcBuf)
                break;
            c = buf[len];
        }
    }
    return len;
}

const int MAX_LINE_LEN = 110;

void Document::BreakLines()
{
    _aParaLine = new ParaLine [_cPara];
    for (int i = 0; i < _cPara; i++)
    {
        int cwcLeft = _aParaOffset[i+1] - _aParaOffset[i];

        if (cwcLeft < MAX_LINE_LEN)
            _aParaLine[i].offEnd = cwcLeft;
        else
        {
            ParaLine* pParaLine = &_aParaLine[i];
            WCHAR* buf = _buffer + _aParaOffset[i];
            int cwcOffset = 0;

            for (;;)
            {
                int cwcLine = BreakLine ( buf + cwcOffset, cwcLeft, MAX_LINE_LEN );
                cwcOffset += cwcLine;
                pParaLine->offEnd = cwcOffset;
                cwcLeft -= cwcLine;
                if (cwcLeft == 0)
                    break;
                pParaLine->next = new ParaLine;
                pParaLine = pParaLine->next;
            };
        }
    }
}

//+-------------------------------------------------------------------------
//
//  Member:     Document::GetLine, public
//
//  Arguments:  [nPara] -- paragraph number
//              [off] -- offset within paragraph
//              [cwc] -- in/out chars to copy / copied
//              [buf] -- target buffer
//
//  Synopsis:   Copy text from paragraph to buffer
//
//--------------------------------------------------------------------------


BOOL Document::GetLine(int nPara, int off, int& cwc, WCHAR* buf)
{
    Win4Assert (_buffer != 0);
    if (nPara >= _cPara)
        return FALSE;

    const WCHAR * pText = _buffer + _aParaOffset[nPara] + off;

    // _aParaOffset [_cPara] is the offset of the end of buffer
    int cwcPara = _aParaOffset[nPara+1] - (_aParaOffset[nPara] + off);

    cwc = __min ( cwc, cwcPara );
    memcpy ( buf, pText, cwc * sizeof(WCHAR));
    return TRUE;
}

//+-------------------------------------------------------------------------
//
//  Member:     Document::GetWord, public
//
//  Synopsis:
//  Copy the string into buffer
//
//--------------------------------------------------------------------------

void Document::GetWord(int nPara, int offSrc, int cwcSrc, WCHAR* buf)
{
    Win4Assert (_buffer != 0);
    Win4Assert ( nPara < _cPara );

    WCHAR * p = _buffer + _aParaOffset[nPara];

    Win4Assert ( p + offSrc + cwcSrc <= _bufEnd );

    memcpy ( buf, p + offSrc, cwcSrc * sizeof(WCHAR));
}
Add source files 2020-09-26 03:20:57 -05:00			`//+-------------------------------------------------------------------------`
			`//`
			`// Microsoft Windows`
			`// Copyright (C) Microsoft Corporation, 1992 - 2000.`
			`//`
			`// File: document.cxx`
			`//`
			`// Contents: The Document part of the browser`
			`//`
			`//--------------------------------------------------------------------------`

			`#include <pch.cxx>`
			`#pragma hdrstop`

			`#define TheSearch pSearch`

			`const int UNICODE_PARAGRAPH_SEPARATOR=0x2029;`

			`const GUID guidStorage = PSGUID_STORAGE;`

			`//+-------------------------------------------------------------------------`
			`//`
			`// Member: Position::Compare, public`
			`//`
			`// Synopsis: Compare two positions`
			`//`
			`//--------------------------------------------------------------------------`

			`int Position::Compare( const Position& pos ) const`
			`{`
			`int diff = _para - pos.Para();`
			`if ( diff == 0 )`
			`diff = _begOff - pos.BegOff();`
			`return diff;`
			`}`

			`//+-------------------------------------------------------------------------`
			`//`
			`// Member: Hit::Hit, public`
			`//`
			`// Synopsis: Create hit from an array of positions`
			`//`
			`//--------------------------------------------------------------------------`

			`Hit::Hit( const Position * aPos, unsigned cPos )`
			`: _cPos(cPos)`
			`{`
			`_aPos = new Position[cPos];`

			`memcpy( _aPos, aPos, sizeof(Position) * cPos );`
			`}`

			`Hit::~Hit()`
			`{`
			`delete _aPos;`
			`}`

			`//+-------------------------------------------------------------------------`
			`//`
			`// Member: HitIter::GetPositionCount, public`
			`//`
			`// Synopsis: return number of positions or zero`
			`//`
			`//--------------------------------------------------------------------------`

			`int HitIter::GetPositionCount() const`
			`{`
			`if (_iHit < _pDoc->_cHit && _pDoc->_aHit[_iHit])`
			`return _pDoc->_aHit[_iHit]->Count();`

			`return 0;`
			`}`

			`//+-------------------------------------------------------------------------`
			`//`
			`// Member: HitIter::GetPosition, public`
			`//`
			`// Synopsis: return position by value`
			`//`
			`//--------------------------------------------------------------------------`

			`Position HitIter::GetPosition ( int i ) const`
			`{`
			`if ( _iHit < _pDoc->_cHit && _pDoc->_aHit[_iHit] )`
			`return _pDoc->_aHit[_iHit]->GetPos(i);`
			`else`
			`{`
			`Position pos;`
			`return( pos );`
			`}`
			`}`

			`//+-------------------------------------------------------------------------`
			`//`
			`// Member: Document::Document, public`
			`//`
			`// Synopsis: Initialize document with filename`
			`//`
			`//--------------------------------------------------------------------------`

			`Document::Document(WCHAR const* filename, LONG rank, BOOL fDelete)`
			`: _filename(0),`
			`_rank (rank),`
			`_buffer(0),`
			`_bufLen(0),`
			`_bufEnd(0),`
			`_pFilter(0),`
			`_aParaOffset(0),`
			`_isInit(FALSE),`
			`_cHit(0),`
			`_aParaLine(0),`
			`_maxParaLen(0),`
			`_cPara(0),`
			`_chunkCount(0),`
			`_fDelete( fDelete )`
			`{`
			`_filename = new WCHAR[ wcslen( filename ) + 1 ];`
			`wcscpy( _filename, filename );`
			`}`

			`//+-------------------------------------------------------------------------`
			`//`
			`// Member: Document::Document, public`
			`//`
			`// Synopsis: Initialize document`
			`//`
			`//--------------------------------------------------------------------------`

			`Document::Document()`
			`: _filename(0),`
			`_buffer(0),`
			`_bufLen(0),`
			`_bufEnd(0),`
			`_pFilter(0),`
			`_aParaOffset(0),`
			`_isInit(FALSE),`
			`_cHit(0),`
			`_aParaLine(0),`
			`_maxParaLen(0),`
			`_cPara(0),`
			`_chunkCount(0),`
			`_fDelete( FALSE )`
			`{}`

			`//+-------------------------------------------------------------------------`
			`//`
			`// Member: Document::~Document, public`
			`//`
			`// Synopsis: Free document`
			`//`
			`//--------------------------------------------------------------------------`

			`Document::~Document()`
			`{`
			`Free();`
			`}`

			`//+-------------------------------------------------------------------------`
			`//`
			`// Member: Document::Free, public`
			`//`
			`// Synopsis: Free document storage`
			`//`
			`//--------------------------------------------------------------------------`

			`void Document::Free()`
			`{`
			`if ( 0 != _filename )`
			`{`
			`if ( _fDelete )`
			`DeleteFile( _filename );`

			`delete [] _filename;`
			`}`

			`if (!_isInit)`
			`return;`

			`for ( unsigned i = 0; i < _cHit; i++ )`
			`{`
			`delete _aHit[i];`
			`_aHit[i] = 0;`
			`}`

			`// _aHit is embedded`

			`delete []_aParaOffset;`
			`_aParaOffset = 0;`

			`if (_aParaLine)`
			`{`
			`for (int i = 0; i < _cPara; i++)`
			`{`
			`while (_aParaLine[i].next != 0)`
			`{`
			`ParaLine* p = _aParaLine[i].next;`
			`_aParaLine[i].next = _aParaLine[i].next->next;`
			`delete p;`
			`}`
			`}`
			`delete _aParaLine;`
			`}`

			`delete _buffer;`

			`_buffer = 0;`

			`_bufEnd = 0;`
			`_cHit = 0;`

			`_isInit = FALSE;`
			`} //Free`

			`//+-------------------------------------------------------------------------`
			`//`
			`// Member: Document::Init, public`
			`//`
			`// Synopsis: Read-in file, fill array of hits`
			`//`
			`//--------------------------------------------------------------------------`

			`SCODE Document::Init(ISearchQueryHits *pSearch)`
			`{`
			`BOOL noHits = FALSE;`

			`SCODE sc = S_OK;`

			`TRY`
			`{`
			`AllocBuffer( _filename );`
			`BindToFilter( _filename );`

			`ULONG ulFlags;`
			`sc = _pFilter->Init( IFILTER_INIT_CANON_PARAGRAPHS \|`
			`IFILTER_INIT_CANON_HYPHENS \|`
			`IFILTER_INIT_APPLY_INDEX_ATTRIBUTES,`
			`0, 0, &ulFlags );`

			`if (FAILED (sc))`
			`THROW (CException(sc));`

			`ReadFile();`

			`BreakParas();`

			`if (Paras() != 0)`
			`{`
			`BreakLines();`

			`#if 0`
			`// some filters don't behave correctly if you just re-init them,`
			`// so release the filter and re-open it.`

			`_pFilter->Release();`
			`_pFilter = 0;`
			`BindToFilter();`
			`#endif`

			`sc = _pFilter->Init ( IFILTER_INIT_CANON_PARAGRAPHS \|`
			`IFILTER_INIT_CANON_HYPHENS \|`
			`IFILTER_INIT_APPLY_INDEX_ATTRIBUTES,`
			`0, 0, &ulFlags );`
			`sc = TheSearch->Init( _pFilter, ulFlags );`

			`if (FAILED (sc))`
			`{`
			`if ( QUERY_E_ALLNOISE != sc )`
			`THROW (CException(sc));`
			`// we can still show the file`

			`sc = S_OK;`
			`noHits = TRUE;`
			`}`

			`// SUCCESS`
			`_isInit = TRUE;`
			`}`
			`}`
			`CATCH ( CException, e )`
			`{`
			`_isInit = FALSE;`
			`sc = e.GetErrorCode();`
			`}`
			`END_CATCH;`

			`if (!noHits)`
			`{`
			`//`
			`// pull up all the hits`
			`//`

			`ULONG count;`
			`FILTERREGION* aRegion;`
			`SCODE sc = TheSearch->NextHitOffset ( &count, &aRegion );`

			`while (sc == S_OK)`
			`{`
			`XCoMem<FILTERREGION> xRegion( aRegion );`

			`CDynArrayInPlace<Position> aPos( count );`

			`for (unsigned i = 0; i < count; i++)`
			`aPos [i] = RegionToPos ( aRegion [i] );`

			`xRegion.Free();`

			`XPtr<Hit> xHit( new Hit( aPos.GetPointer(), count ) );`

			`_aHit[_cHit] = xHit.Get();`
			`_cHit++;`
			`xHit.Acquire();`

			`sc = TheSearch->NextHitOffset ( &count, &aRegion );`
			`}`
			`}`
			`else`
			`{`
			`_cHit = 0;`
			`_isInit = (_bufEnd - _buffer) != 0;`
			`}`

			`if ( _pFilter )`
			`{`
			`_pFilter->Release();`
			`_pFilter = 0;`
			`}`

			`return _isInit ? S_OK : sc;`
			`}`

			`Position Document::RegionToPos ( FILTERREGION& region )`
			`{`
			`static int paraHint = 0;`
			`static int iChunkHint = 0;`
			`static Position posNull;`

			`ULONG offset = ULONG (-1);`

			`// translate region to offset into buffer`
			`if (iChunkHint >= _chunkCount \|\| _chunk[iChunkHint].ChunkId() != region.idChunk )`
			`{`
			`iChunkHint = 0;`

			`while ( iChunkHint < _chunkCount && _chunk[iChunkHint].ChunkId() < region.idChunk )`
			`{`
			`iChunkHint++;`
			`}`

			`if (iChunkHint >= _chunkCount \|\| _chunk[iChunkHint].ChunkId() != region.idChunk)`
			`return posNull;`
			`}`

			`Win4Assert ( iChunkHint < _chunkCount );`
			`Win4Assert ( _chunk[iChunkHint].ChunkId() == region.idChunk );`

			`offset = _chunk[iChunkHint].Offset() + region.cwcStart;`

			`if (paraHint >= _cPara \|\| _aParaOffset[paraHint] > offset )`
			`paraHint = 0;`

			`Win4Assert ( _aParaOffset[paraHint] <= offset );`

			`for ( ; paraHint <= _cPara; paraHint++)`
			`{`
			`// _aParaOffset[_cPara] is valid!`

			`if (_aParaOffset[paraHint] > offset)`
			`{`
			`Win4Assert (paraHint > 0);`
			`paraHint--;`
			`return Position ( paraHint,`
			`offset - _aParaOffset[paraHint],`
			`region.cwcExtent );`
			`}`
			`}`

			`return posNull;`
			`}`

			`//+-------------------------------------------------------------------------`
			`//`
			`// Member: Document::AllocBuffer, public`
			`//`
			`// Synopsis: Allocate buffer for file text`
			`//`
			`//--------------------------------------------------------------------------`

			`void Document::AllocBuffer ( WCHAR const * pwcPath )`
			`{`
			`//`
			`// We should keep allocating buffers on demand,`
			`// but for this simple demo we'll just get the`
			`// file size up front and do a single buffer`
			`// allocation of 2.25 the size (to accommodate`
			`// Unicode expansion). THIS IS JUST A DEMO!`
			`//`

			`HANDLE hFile = CreateFile ( pwcPath,`
			`GENERIC_READ,`
			`FILE_SHARE_READ,`
			`0, // security`
			`OPEN_EXISTING,`
			`FILE_ATTRIBUTE_NORMAL,`
			`0 ); // template`

			`if ( INVALID_HANDLE_VALUE == hFile )`
			`THROW( CException() );`

			`_bufLen = GetFileSize(hFile, 0 );`
			`CloseHandle ( hFile );`

			`// Unicode from ASCII, twice and then some`

			`_bufLen = 2 * _bufLen + _bufLen / 4 + 1;`

			`_buffer = new WCHAR [_bufLen + 1];`
			`_buffer[ _bufLen ] = 0;`
			`}`

			`typedef HRESULT (__stdcall * PFnLoadTextFilter)( WCHAR const * pwcPath,`
			`IFilter ** ppIFilter );`

			`PFnLoadTextFilter g_pLoadTextFilter = 0;`

			`SCODE MyLoadTextFilter( WCHAR const pwc, IFilter *ppFilter )`
			`{`
			`if ( 0 == g_pLoadTextFilter )`
			`{`
			`g_pLoadTextFilter = (PFnLoadTextFilter) GetProcAddress( GetModuleHandle( L"query.dll" ), "LoadTextFilter" );`

			`if ( 0 == g_pLoadTextFilter )`
			`return HRESULT_FROM_WIN32( GetLastError() );`
			`}`

			`return g_pLoadTextFilter( pwc, ppFilter );`
			`}`

			`//+-------------------------------------------------------------------------`
			`//`
			`// Member: Document::BindToFilter, public`
			`//`
			`// Synopsis: Bind to appropriate filter for the document`
			`//`
			`//--------------------------------------------------------------------------`

			`void Document::BindToFilter( WCHAR const * pwcPath )`
			`{`
			`//`
			`// Bind to the filter interface`
			`//`

			`SCODE sc = LoadIFilter( pwcPath, 0, (void **)&_pFilter );`

			`if ( FAILED(sc) )`
			`{`
			`sc = MyLoadTextFilter( pwcPath, &_pFilter );`
			`if ( FAILED(sc) )`
			`THROW( CException(sc) );`
			`}`
			`}`

			`//+-------------------------------------------------------------------------`
			`//`
			`// Member: Document::ReadFile, public`
			`//`
			`// Synopsis: Read file into buffer using the filter`
			`//`
			`//--------------------------------------------------------------------------`

			`void Document::ReadFile ()`
			`{`
			`SCODE sc;`
			`ULONG lenSoFar = 0;`
			`int cChunk = 0;`
			`BOOL fSeenProp = FALSE;`

			`STAT_CHUNK statChunk;`
			`sc = _pFilter->GetChunk ( &statChunk );`

			`// what about all these glueing flags?`
			`// Take them into account at some point`
			`// to test more complicated chunking`

			`while (SUCCEEDED(sc)`
			`\|\| FILTER_E_LINK_UNAVAILABLE == sc`
			`\|\| FILTER_E_EMBEDDING_UNAVAILABLE == sc )`
			`{`

			`if ( SUCCEEDED( sc ) && (statChunk.flags & CHUNK_TEXT) )`
			`{`
			`// read the contents only`

			`if ( statChunk.attribute.guidPropSet == guidStorage &&`
			`statChunk.attribute.psProperty.ulKind == PRSPEC_PROPID &&`
			`statChunk.attribute.psProperty.propid == PID_STG_CONTENTS )`
			`{`
			`if ( statChunk.breakType != CHUNK_NO_BREAK )`
			`{`
			`switch( statChunk.breakType )`
			`{`
			`case CHUNK_EOW:`
			`case CHUNK_EOS:`
			`_buffer[lenSoFar++] = L' ';`
			`break;`
			`case CHUNK_EOP:`
			`case CHUNK_EOC:`
			`_buffer[lenSoFar++] = UNICODE_PARAGRAPH_SEPARATOR;`
			`break;`
			`}`
			`}`

			`_chunk [cChunk].SetChunkId (statChunk.idChunk);`
			`Win4Assert ( cChunk == 0 \|\| statChunk.idChunk > _chunk [cChunk - 1].ChunkId () );`
			`_chunk [cChunk].SetOffset (lenSoFar);`
			`cChunk++;`

			`do`
			`{`
			`ULONG lenThis = _bufLen - lenSoFar;`
			`if (lenThis == 0)`
			`break;`

			`sc = _pFilter->GetText( &lenThis, _buffer+lenSoFar );`

			`// The buffer may be filled with zeroes. Nice filter.`

			`if ( SUCCEEDED(sc) && 0 != lenThis )`
			`{`
			`lenThis = __min( lenThis,`
			`wcslen( _buffer + lenSoFar ) );`
			`lenSoFar += lenThis;`
			`}`
			`}`
			`while (SUCCEEDED(sc));`
			`}`
			`} // if SUCCEEDED( sc )`

			`// next chunk, please`
			`sc = _pFilter->GetChunk ( &statChunk );`
			`}`

			`_bufEnd = _buffer + lenSoFar;`

			`Win4Assert( lenSoFar <= _bufLen );`

			`_chunkCount = cChunk;`
			`}`


			`//+-------------------------------------------------------------------------`
			`//`
			`// Member: Document::BreakParas, public`
			`//`
			`// Synopsis: Break document into paragraphs separated by line feeds`
			`//`
			`//--------------------------------------------------------------------------`

			`#define PARAS 25`

			`void Document::BreakParas()`
			`{`
			`int maxParas = PARAS;`
			`_aParaOffset = new unsigned [ maxParas ];`
			`WCHAR * pCur = _buffer;`
			`_cPara = 0;`
			`_maxParaLen = 0;`

			`do`
			`{`
			`if ( _cPara == maxParas )`
			`{`
			`// grow array`
			`unsigned * tmp = new unsigned [maxParas * 2];`
			`for ( int n = 0; n < maxParas; n++ )`
			`tmp[n] = _aParaOffset[n];`
			`delete []_aParaOffset;`
			`_aParaOffset = tmp;`
			`maxParas *= 2;`
			`}`
			`_aParaOffset [_cPara] = (UINT)(pCur - _buffer);`

			`pCur = EatPara(pCur);`

			`_cPara++;`

			`} while ( pCur < _bufEnd );`

			`// store end of buffer offset as _aParaOffset[_cPara]`

			`if ( _cPara == maxParas )`
			`{`
			`// grow array`
			`unsigned * tmp = new unsigned [maxParas + 1];`
			`for ( int n = 0; n < maxParas; n++ )`
			`tmp[n] = _aParaOffset[n];`
			`delete []_aParaOffset;`
			`_aParaOffset = tmp;`
			`maxParas += 1;`
			`}`

			`_aParaOffset [_cPara] = (UINT)(pCur - _buffer - 1);`
			`}`

			`//+-------------------------------------------------------------------------`
			`//`
			`// Member: Document::EatPara, private`
			`//`
			`// Synopsis: Skip till the line feed`
			`//`
			`//--------------------------------------------------------------------------`

			`WCHAR * Document::EatPara( WCHAR * pCur )`
			`{`
			`// search for newline or null`
			`int pos = 0;`
			`int c;`

			`while ( pCur < _bufEnd`
			`&& (c = *pCur) != L'\n'`
			`&& c != L'\r'`
			`&& c != L'\0'`
			`&& c != UNICODE_PARAGRAPH_SEPARATOR )`
			`{`
			`pos++;`
			`pCur++;`
			`}`
			`// eat newline and/or carriage return`
			`pCur++;`
			`if ( pCur < _bufEnd`
			`&& *(pCur-1) == L'\r'`
			`&& *pCur == L'\n' )`
			`pCur++;`

			`if ( pos > _maxParaLen )`
			`_maxParaLen = pos;`
			`return pCur;`
			`}`

			`int BreakLine ( WCHAR* buf, int cwcBuf, int cwcMax )`
			`{`
			`if (cwcBuf <= cwcMax)`
			`return cwcBuf;`
			`Win4Assert (cwcMax > 0);`
			`// look backwards for whitespace`
			`int len = cwcMax;`
			`int c = buf[len-1];`
			`while (c != L' ' && c != L'\t')`
			`{`
			`len--;`
			`if (len < 1)`
			`break;`
			`c = buf[len-1];`
			`}`
			`if (len == 0)`
			`{`
			`// a single word larger than screen width`
			`// try scanning forward`
			`len = cwcMax;`
			`c = buf[len];`
			`while (c != L' ' && c != L'\t')`
			`{`
			`len++;`
			`if (len == cwcBuf)`
			`break;`
			`c = buf[len];`
			`}`
			`}`
			`return len;`
			`}`

			`const int MAX_LINE_LEN = 110;`

			`void Document::BreakLines()`
			`{`
			`_aParaLine = new ParaLine [_cPara];`
			`for (int i = 0; i < _cPara; i++)`
			`{`
			`int cwcLeft = _aParaOffset[i+1] - _aParaOffset[i];`

			`if (cwcLeft < MAX_LINE_LEN)`
			`_aParaLine[i].offEnd = cwcLeft;`
			`else`
			`{`
			`ParaLine* pParaLine = &_aParaLine[i];`
			`WCHAR* buf = _buffer + _aParaOffset[i];`
			`int cwcOffset = 0;`

			`for (;;)`
			`{`
			`int cwcLine = BreakLine ( buf + cwcOffset, cwcLeft, MAX_LINE_LEN );`
			`cwcOffset += cwcLine;`
			`pParaLine->offEnd = cwcOffset;`
			`cwcLeft -= cwcLine;`
			`if (cwcLeft == 0)`
			`break;`
			`pParaLine->next = new ParaLine;`
			`pParaLine = pParaLine->next;`
			`};`
			`}`
			`}`
			`}`

			`//+-------------------------------------------------------------------------`
			`//`
			`// Member: Document::GetLine, public`
			`//`
			`// Arguments: [nPara] -- paragraph number`
			`// [off] -- offset within paragraph`
			`// [cwc] -- in/out chars to copy / copied`
			`// [buf] -- target buffer`
			`//`
			`// Synopsis: Copy text from paragraph to buffer`
			`//`
			`//--------------------------------------------------------------------------`


			`BOOL Document::GetLine(int nPara, int off, int& cwc, WCHAR* buf)`
			`{`
			`Win4Assert (_buffer != 0);`
			`if (nPara >= _cPara)`
			`return FALSE;`

			`const WCHAR * pText = _buffer + _aParaOffset[nPara] + off;`

			`// _aParaOffset [_cPara] is the offset of the end of buffer`
			`int cwcPara = _aParaOffset[nPara+1] - (_aParaOffset[nPara] + off);`

			`cwc = __min ( cwc, cwcPara );`
			`memcpy ( buf, pText, cwc * sizeof(WCHAR));`
			`return TRUE;`
			`}`

			`//+-------------------------------------------------------------------------`
			`//`
			`// Member: Document::GetWord, public`
			`//`
			`// Synopsis:`
			`// Copy the string into buffer`
			`//`
			`//--------------------------------------------------------------------------`

			`void Document::GetWord(int nPara, int offSrc, int cwcSrc, WCHAR* buf)`
			`{`
			`Win4Assert (_buffer != 0);`
			`Win4Assert ( nPara < _cPara );`

			`WCHAR * p = _buffer + _aParaOffset[nPara];`

			`Win4Assert ( p + offSrc + cwcSrc <= _bufEnd );`

			`memcpy ( buf, p + offSrc, cwcSrc * sizeof(WCHAR));`
			`}`