664 lines
18 KiB
C++
664 lines
18 KiB
C++
//+---------------------------------------------------------------------------
|
|
//
|
|
// Microsoft Windows
|
|
// Copyright (C) Microsoft Corporation, 1991 - 2000.
|
|
//
|
|
// File: NOISE.CXX
|
|
//
|
|
// Contents: Noise list
|
|
//
|
|
// Classes: CNoiseList, NoiseListInit, NoiseListEmpty
|
|
// CLString, CStringList, CStringTable
|
|
//
|
|
// History: 11-Jul-91 BartoszM Created
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
|
|
#include <pch.cxx>
|
|
#pragma hdrstop
|
|
|
|
#include <noise.hxx>
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Member: CLString::CLString, public
|
|
//
|
|
// Synopsis: Initializes and links a string list element
|
|
//
|
|
// Arguments: [cb] -- length
|
|
// [buf] -- string
|
|
// [next] -- next link in the chain
|
|
//
|
|
// History: 16-Jul-91 BartoszM Created.
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
CLString::CLString ( UINT cb, const BYTE* buf, CLString* next )
|
|
{
|
|
_cb = cb;
|
|
#if CIDBG == 1
|
|
cb++;
|
|
#endif
|
|
memcpy ( _buf, buf, cb );
|
|
_next = next;
|
|
}
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Member: CLString::operator new, public
|
|
//
|
|
// Synopsis: Allocates a string list element
|
|
//
|
|
// Arguments: [n] -- size of class instance
|
|
// [cb] -- length of string buffer needed
|
|
//
|
|
// History: 10 Apr 96 AlanW Created.
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
|
|
void *
|
|
CLString::operator new ( size_t n, UINT cb )
|
|
{
|
|
#if CIDBG == 1
|
|
cb++;
|
|
#endif
|
|
return new BYTE [n+cb];
|
|
}
|
|
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Member: CStringList::~CStringList, public
|
|
//
|
|
// Synopsis: Free linked list
|
|
//
|
|
// History: 16-Jul-91 BartoszM Created.
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
CStringList::~CStringList()
|
|
{
|
|
while ( _head != 0 )
|
|
{
|
|
CLString* p = _head;
|
|
_head = _head->Next();
|
|
delete p;
|
|
}
|
|
}
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Member: CStringList::Add, public
|
|
//
|
|
// Synopsis: Adds a string to list
|
|
//
|
|
// Arguments: [cb] -- length
|
|
// [str] -- string
|
|
//
|
|
// History: 16-Jul-91 BartoszM Created.
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
void CStringList::Add ( UINT cb, const BYTE * str )
|
|
{
|
|
_head = new (cb) CLString ( cb, str, _head );
|
|
}
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Member: CStringList::Find, public
|
|
//
|
|
// Synopsis: Returns TRUE if string found in the list, FALSE otherwise
|
|
//
|
|
// Arguments: [cb] -- length
|
|
// [str] -- string
|
|
//
|
|
// History: 16-Jul-91 BartoszM Created.
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
BOOL CStringList::Find ( UINT cb, const BYTE* str ) const
|
|
{
|
|
CLString* pStr = _head;
|
|
while ( pStr != 0 )
|
|
{
|
|
if ( pStr->Equal ( cb, str ) )
|
|
{
|
|
return TRUE;
|
|
}
|
|
pStr = pStr->Next();
|
|
}
|
|
return FALSE;
|
|
}
|
|
|
|
#if CIDBG == 1
|
|
|
|
void CStringList::Dump () const
|
|
{
|
|
CLString * p = _head;
|
|
while ( p )
|
|
{
|
|
p->Dump();
|
|
p = p->Next();
|
|
}
|
|
ciDebugOut (( DEB_ITRACE, "\n" ));
|
|
}
|
|
|
|
#endif // CIDBG == 1
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Member: CStringTable::CStringTable, public
|
|
//
|
|
// Synopsis: Create hash table of given size
|
|
//
|
|
// Arguments: [size] -- size
|
|
//
|
|
// History: 16-Jul-91 BartoszM Created.
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
CStringTable::CStringTable( UINT size )
|
|
{
|
|
_size = size;
|
|
_bucket = new CStringList[size];
|
|
}
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Member: CStringTable::~CStringTable, public
|
|
//
|
|
// Synopsis: Free linked lists
|
|
//
|
|
// History: 16-Jul-91 BartoszM Created.
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
CStringTable::~CStringTable()
|
|
{
|
|
delete [] _bucket;
|
|
}
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Member: CStringTable::Add, publid
|
|
//
|
|
// Synopsis: Add a string to hash table
|
|
//
|
|
// Arguments: [cb] -- size
|
|
// [str] -- string
|
|
// [hash] -- precomputed hash value
|
|
//
|
|
// History: 16-Jul-91 BartoszM Created.
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
void CStringTable::Add ( UINT cb, const BYTE* str, UINT hash )
|
|
{
|
|
_bucket[_index(hash)].Add ( cb, str );
|
|
}
|
|
|
|
#if CIDBG == 1
|
|
|
|
void CStringTable::Dump () const
|
|
{
|
|
for ( unsigned i = 0; i < _size; i++ )
|
|
{
|
|
if ( !_bucket[i].IsEmpty() )
|
|
{
|
|
ciDebugOut (( DEB_ITRACE, "%3d: ", i ));
|
|
_bucket[i].Dump();
|
|
}
|
|
}
|
|
}
|
|
|
|
#endif // CIDBG == 1
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Member: CNoiseList::CNoiseList
|
|
//
|
|
// Synopsis: constructor for noise list
|
|
//
|
|
// Effects: gets buffers from key repository
|
|
//
|
|
// Arguments: [krep] -- key repository to give words to.
|
|
//
|
|
// History: 05-June-91 t-WadeR Created.
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
CNoiseList::CNoiseList( const CStringTable& table, PKeyRepository& krep )
|
|
: _krep(krep),
|
|
_table(table),
|
|
_cNoiseWordsSkipped(0),
|
|
_cNonNoiseAltWords(0),
|
|
_fFoundNoise( FALSE )
|
|
{
|
|
krep.GetBuffers( &_pcbOutBuf, &_pbOutBuf, &_pocc );
|
|
_cbMaxOutBuf = *_pcbOutBuf;
|
|
}
|
|
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Member: CNoiseList::GetBuffers
|
|
//
|
|
// Synopsis: Returns address of normilizer's input buffers
|
|
//
|
|
// Arguments: [ppcbInBuf] -- pointer to pointer to size of input buffer
|
|
// [ppbInBuf] -- pointer to pointer to recieve address of buffer
|
|
//
|
|
// History: 05-June-91 t-WadeR Created.
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
void CNoiseList::GetBuffers( UINT** ppcbInBuf, BYTE** ppbInBuf )
|
|
{
|
|
// Don't actually have an in buffer, so pass through the out buffer
|
|
*ppbInBuf = _pbOutBuf;
|
|
*_pcbOutBuf = _cbMaxOutBuf;
|
|
*ppcbInBuf = _pcbOutBuf;
|
|
}
|
|
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Member: CNoiseList::GetFlags
|
|
//
|
|
// Synopsis: Returns address of ranking and range flags
|
|
//
|
|
// Arguments: [ppRange] -- range flag
|
|
// [ppRank] -- rank flag
|
|
//
|
|
// History: 11-Fab-92 BartoszM Created.
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
void CNoiseList::GetFlags ( BOOL** ppRange, CI_RANK** ppRank )
|
|
{
|
|
_krep.GetFlags ( ppRange, ppRank );
|
|
}
|
|
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Member: CNoiseList::PutWord
|
|
//
|
|
// Synopsis: If word isn't a noise word, passes it to the key repository
|
|
//
|
|
// Effects: calls _krep.PutKey
|
|
//
|
|
// Arguments: [hash] -- precomputed hash value
|
|
//
|
|
// History: 05-June-91 t-WadeR Created stub.
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
void CNoiseList::PutWord ( UINT hash )
|
|
{
|
|
// Check the word to see if it should pass through.
|
|
if ( _table.Find ( *_pcbOutBuf, _pbOutBuf, hash ))
|
|
{
|
|
_fFoundNoise = TRUE;
|
|
|
|
//
|
|
// if all alternate words at current occurrence have been noise words,
|
|
// then it is equivalent to one noise word at current occcurrence,
|
|
// hence increment count of noise words skipped
|
|
//
|
|
if ( _cNonNoiseAltWords == 0 )
|
|
_cNoiseWordsSkipped++;
|
|
}
|
|
else
|
|
{
|
|
//
|
|
// output word to key repository. The count of noise words skipped refers to
|
|
// noise words at previous occurrences only
|
|
//
|
|
_krep.PutKey( _cNoiseWordsSkipped );
|
|
_cNoiseWordsSkipped = 0;
|
|
}
|
|
|
|
// reset count of non-noise words in preparation for word at next occurrence
|
|
_cNonNoiseAltWords = 0;
|
|
|
|
(*_pocc)++;
|
|
}
|
|
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Member: CNoiseList::PutAltWord
|
|
//
|
|
// Synopsis: If word isn't a noise word, passes it to the key repository
|
|
//
|
|
// Effects: calls _krep.PutKey
|
|
//
|
|
// Arguments: [hash] -- precomputed hash value
|
|
//
|
|
// History: 03-May-95 SitaramR Created
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
void CNoiseList::PutAltWord ( UINT hash )
|
|
{
|
|
// Check the word to see if it should pass through.
|
|
if ( _table.Find ( *_pcbOutBuf, _pbOutBuf, hash ) )
|
|
{
|
|
_fFoundNoise = TRUE;
|
|
}
|
|
else
|
|
{
|
|
//
|
|
// since this is not the last of a sequence of alternate words we increment
|
|
// count of non-noise words at current occurrence
|
|
//
|
|
_cNonNoiseAltWords++;
|
|
|
|
//
|
|
// output word to key repository. The count of noise words skipped refers to
|
|
// noise words at previous occurrences only
|
|
//
|
|
_krep.PutKey( _cNoiseWordsSkipped );
|
|
_cNoiseWordsSkipped = 0;
|
|
}
|
|
}
|
|
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Member: CNoiseList::StartAltPhrase
|
|
//
|
|
// History: 29-Nov-94 SitaramR Created
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
|
|
void CNoiseList::StartAltPhrase()
|
|
{
|
|
_krep.StartAltPhrase( _cNoiseWordsSkipped );
|
|
_cNoiseWordsSkipped = 0;
|
|
}
|
|
|
|
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Member: CNoiseList::EndAltPhrase
|
|
//
|
|
// History: 29-Nov-94 SitaramR Created
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
|
|
void CNoiseList::EndAltPhrase()
|
|
{
|
|
_krep.EndAltPhrase( _cNoiseWordsSkipped );
|
|
_cNoiseWordsSkipped = 0;
|
|
}
|
|
|
|
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Member: CNoiseListInit::CNoiseListInit
|
|
//
|
|
// Synopsis: Creates a hash table to be filled
|
|
//
|
|
// Arguments: [size] -- size of the hash table (possibly prime #)
|
|
//
|
|
// History: 15-Jul-91 BartoszM Created.
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
|
|
CNoiseListInit::CNoiseListInit ( UINT size )
|
|
{
|
|
_table = new CStringTable ( size );
|
|
|
|
END_CONSTRUCTION( CNoiseListInit );
|
|
}
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Member: CNoiseListInit::GetBuffers
|
|
//
|
|
// Synopsis: Returns address of repository's input buffers
|
|
//
|
|
// Arguments: [ppcbInBuf] -- pointer to pointer to size of input buffer
|
|
// [ppbInBuf] -- pointer to pointer to recieve address of buffer
|
|
//
|
|
// History: 15-Jul-91 BartoszM Created.
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
|
|
void CNoiseListInit::GetBuffers( UINT** ppcbInBuf, BYTE** ppbInBuf )
|
|
{
|
|
_key.SetCount(MAXKEYSIZE);
|
|
*ppcbInBuf = _key.GetCountAddress();
|
|
*ppbInBuf = _key.GetWritableBuf();
|
|
}
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Member: CNoiseListInit::PutWord
|
|
//
|
|
// Synopsis: Puts a key into the hash table
|
|
//
|
|
// Arguments: [hash] -- hash value
|
|
//
|
|
// History: 15-Jul-91 BartoszM Created
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
void CNoiseListInit::PutWord ( UINT hash )
|
|
{
|
|
_table->Add ( _key.Count(), _key.GetBuf(), hash );
|
|
}
|
|
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Member: CNoiseListInit::PutAltWord
|
|
//
|
|
// Synopsis: Puts a key into the hash table
|
|
//
|
|
// Arguments: [hash] -- hash value
|
|
//
|
|
// History: 03-May-95 SitaramR Created
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
void CNoiseListInit::PutAltWord ( unsigned hash )
|
|
{
|
|
PutWord( hash );
|
|
}
|
|
|
|
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Member: CNoiseListEmpty::CNoiseListEmpty
|
|
//
|
|
// Synopsis: constructor for a default empty noise list
|
|
//
|
|
// Effects: gets buffers from key repository
|
|
//
|
|
// Arguments: [krep] -- key repository to give words to.
|
|
// [ulFuzzy] -- Fuzziness of query
|
|
//
|
|
// History: 16-Jul-91 BartoszM Created.
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
CNoiseListEmpty::CNoiseListEmpty( PKeyRepository& krep, ULONG ulFuzzy )
|
|
: _krep(krep),
|
|
_ulGenerateMethod(ulFuzzy),
|
|
_cNoiseWordsSkipped(0),
|
|
_cNonNoiseAltWords(0),
|
|
_fFoundNoise( FALSE )
|
|
{
|
|
krep.GetBuffers( &_pcbOutBuf, &_pbOutBuf, &_pocc );
|
|
_cbMaxOutBuf = *_pcbOutBuf;
|
|
}
|
|
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Member: CNoiseListEmpty::GetBuffers
|
|
//
|
|
// Synopsis: Returns address of normilizer's input buffers
|
|
//
|
|
// Arguments: [ppcbInBuf] -- pointer to pointer to size of input buffer
|
|
// [ppbInBuf] -- pointer to pointer to recieve address of buffer
|
|
//
|
|
// History: 16-Jul-91 BartoszM Created.
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
void CNoiseListEmpty::GetBuffers( UINT** ppcbInBuf, BYTE** ppbInBuf )
|
|
{
|
|
// Don't actually have an in buffer, so pass through the out buffer
|
|
*ppbInBuf = _pbOutBuf;
|
|
*_pcbOutBuf = _cbMaxOutBuf;
|
|
*ppcbInBuf = _pcbOutBuf;
|
|
}
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Member: CNoiseListEmpty::GetFlags
|
|
//
|
|
// Synopsis: Returns address of ranking and range flags
|
|
//
|
|
// Arguments: [ppRange] -- range flag
|
|
// [ppRank] -- rank flag
|
|
//
|
|
// History: 11-Fab-92 BartoszM Created.
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
void CNoiseListEmpty::GetFlags ( BOOL** ppRange, CI_RANK** ppRank )
|
|
{
|
|
_krep.GetFlags ( ppRange, ppRank );
|
|
}
|
|
|
|
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Member: CNoiseListEmpty::PutWord
|
|
//
|
|
// Synopsis: If word isn't a noise word, passes it to the key repository
|
|
//
|
|
// Effects: calls _krep.PutKey
|
|
//
|
|
// Arguments: [hash] -- hash value (ignored)
|
|
//
|
|
// History: 16-Jul-91 BartoszM Created
|
|
//
|
|
// Notes: Filters out one letter words, unless it is a prefix (*) query
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
void CNoiseListEmpty::PutWord ( UINT )
|
|
{
|
|
//
|
|
// Even though the noise list is empty, we are modeling PutBreak()
|
|
// by a skip of appropriate number of noise words, and we are counting
|
|
// 1 letter words as noise words. Note that the length is in bytes and there is
|
|
// a 1 byte prefix.
|
|
//
|
|
if ( _ulGenerateMethod != GENERATE_METHOD_PREFIX && *_pcbOutBuf <= NOISE_WORD_LENGTH )
|
|
{
|
|
_fFoundNoise = TRUE;
|
|
|
|
//
|
|
// if all alternate words at current occurrence have been noise words,
|
|
// then it is equivalent to one noise word at current occcurrence,
|
|
// hence increment count of noise words skipped
|
|
//
|
|
if ( _cNonNoiseAltWords == 0 )
|
|
_cNoiseWordsSkipped++;
|
|
}
|
|
else
|
|
{
|
|
//
|
|
// output word to key repository. The count of noise words skipped refers to
|
|
// noise words at previous occurrences only
|
|
//
|
|
_krep.PutKey( _cNoiseWordsSkipped );
|
|
_cNoiseWordsSkipped = 0;
|
|
}
|
|
|
|
// reset count of non-noise words in preparation for word at next occurrence
|
|
_cNonNoiseAltWords = 0;
|
|
|
|
(*_pocc)++;
|
|
}
|
|
|
|
|
|
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Member: CNoiseListEmpty::PutAltWord
|
|
//
|
|
// Synopsis: If word isn't a noise word, passes it to the key repository
|
|
//
|
|
// Effects: calls _krep.PutKey
|
|
//
|
|
// Arguments: [hash] -- precomputed hash value
|
|
//
|
|
// History: 03-May-95 SitaramR Created
|
|
//
|
|
// Notes: Filters out one letter words, unless it is a prefix (*) query
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
void CNoiseListEmpty::PutAltWord ( UINT hash )
|
|
{
|
|
//
|
|
// Even though the noise list is empty, we are modeling PutBreak()
|
|
// by a skip of appropriate number of noise words, and we are counting
|
|
// 1 letter words as noise words. Note that the length is in bytes and there is
|
|
// a 1 byte prefix.
|
|
//
|
|
if ( _ulGenerateMethod == GENERATE_METHOD_PREFIX || *_pcbOutBuf > NOISE_WORD_LENGTH )
|
|
{
|
|
//
|
|
// since this is not the last of a sequence of alternate words we increment
|
|
// count of non-noise words at current occurrence
|
|
//
|
|
|
|
_cNonNoiseAltWords++;
|
|
|
|
//
|
|
// output word to key repository. The count of noise words skipped refers to
|
|
// noise words at previous occurrences only
|
|
//
|
|
|
|
_krep.PutKey( _cNoiseWordsSkipped );
|
|
_cNoiseWordsSkipped = 0;
|
|
}
|
|
else
|
|
_fFoundNoise = TRUE;
|
|
}
|
|
|
|
|
|
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Member: CNoiseListEmpty::StartAltPhrase
|
|
//
|
|
// Synopsis: Pass on StartAltPhrase to key repository
|
|
//
|
|
// History: 20-Feb-95 SitaramR Created
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
|
|
void CNoiseListEmpty::StartAltPhrase()
|
|
{
|
|
_krep.StartAltPhrase( _cNoiseWordsSkipped );
|
|
_cNoiseWordsSkipped = 0;
|
|
}
|
|
|
|
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Member: CNoiseListEmpty::EndAltPhrase
|
|
//
|
|
// Synopsis: Pass on EndAltPhrase to key repository
|
|
//
|
|
// History: 20-Feb-95 SitaramR Created
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
|
|
void CNoiseListEmpty::EndAltPhrase()
|
|
{
|
|
_krep.EndAltPhrase( _cNoiseWordsSkipped );
|
|
_cNoiseWordsSkipped = 0;
|
|
}
|
|
|
|
|