windows-nt/Source/XPSP1/NT/windows/richedit/re30/hash.cpp
2020-09-26 16:20:57 +08:00

287 lines
6.5 KiB
C++

/*
* @doc INTERNAL
*
* @module HASH.C -- RTF control word cache |
* #ifdef'ed with RTF_HASHCACHE
*
* Owner: <nl>
* Jon Matousek <nl>
*
* History: <nl>
* 8/15/95 jonmat first hash-cache for RTF using Brent's Method.
*/
#include "_common.h"
#ifdef RTF_HASHCACHE
#include "hash.h"
ASSERTDATA
extern KEYWORD rgKeyword[]; // All of the RTF control words.
#define MAX_INAME 3
typedef struct {
const KEYWORD *token;
BOOL passBit;
} HashEntry;
static HashEntry *(hashtbl[HASHSIZE]);
static HashEntry *storage; // Dynamically alloc for cKeywords.
BOOL _rtfHashInited = FALSE;
static INT HashKeyword_Key( const CHAR *szKeyword );
/*
* HashKeyword_Insert()
*
* @func
* Insert a KEYWORD into the RTF hash table.
* @comm
* This function uses the the % for MOD
* in order to validate MOD257.
*/
VOID HashKeyword_Insert (
const KEYWORD *token )//@parm pointer to KEYWORD token to insert.
{
TRACEBEGIN(TRCSUBSYSDISP, TRCSCOPEINTERN, "HashKeyword_Insert");
INT index, step, position,
cost, source, sink, index1,
step1, temp;
BOOL tmpPassBit;
static INT totalKeys = 0;
CHAR *szKeyword;
HashEntry *np;
AssertSz ( _rtfHashInited, "forgot to call HashKeyword_Init()");
AssertSz ( totalKeys <= HASHSIZE * 0.7, "prime not large enough to hold total keys");
szKeyword = token->szKeyword;
np = &storage[totalKeys++];
np->token = token;
index = HashKeyword_Key(szKeyword) % HASHSIZE; // Get keys.
step = 1 + (HashKeyword_Key(szKeyword) % (HASHSIZE-1));
position = 1;
cost = HASHSIZE; // The max collisions for any.
while(hashtbl[index]!=NULL) // Find empty slot.
{
position++; // How many collisions.
// For the keyword stored here, calc # times before it is found.
temp=1;
step1= 1+(HashKeyword_Key(hashtbl[index]->token->szKeyword) % (HASHSIZE-1));
index1= (index+step1)%HASHSIZE;
while(hashtbl[index1] !=NULL)
{
index1=(index1+step1)%HASHSIZE;
temp++;
}
// Incremental cost computation, minimizes average # of collisions
// for both keywords.
if (cost>position+temp)
{
source=index;
sink=index1;
cost=position+temp;
}
// There will be something stored beyound here, set the passBit.
hashtbl[index]->passBit=1;
// Next index to search for empty slot.
index=(index+step)%HASHSIZE;
}
if (position<=cost)
{
source=sink=index;
cost=position;
}
hashtbl[sink] = hashtbl[source];
hashtbl[source] = np;
if (hashtbl[sink] && hashtbl[source]) // jOn hack, we didn't really
{ // want to swap pass bits.
tmpPassBit = hashtbl[sink]->passBit;
hashtbl[sink]->passBit = hashtbl[source]->passBit;
hashtbl[source]->passBit = tmpPassBit;
}
}
/*
* static HashKeyword_Key()
*
* @func
* Calculate the hash key.
* @comm
* Just add up the first few characters.
* @rdesc
* The hash Key for calculating the index and step.
*/
static INT HashKeyword_Key(
const CHAR *szKeyword ) //@parm C string to create hash key for.
{
TRACEBEGIN(TRCSUBSYSDISP, TRCSCOPEINTERN, "HashKeyword_Key");
INT i, tot = 0;
/* Just add up first few characters. */
for (i = 0; i < MAX_INAME && *szKeyword; szKeyword++, i++)
tot += (UCHAR) *szKeyword;
return tot;
}
/*
* HashKeyword_Fetch()
*
* @func
* Look up a KEYWORD with the given szKeyword.
* @devnote
* We have a hash table of size 257. This allows for
* the use of very fast routines to calculate a MOD 257.
* This gives us a significant increase in performance
* over a binary search.
* @rdesc
* A pointer to the KEYWORD, or NULL if not found.
*/
const KEYWORD *HashKeyword_Fetch (
const CHAR *szKeyword ) //@parm C string to search for.
{
TRACEBEGIN(TRCSUBSYSDISP, TRCSCOPEINTERN, "HashKeyword_Fetch");
INT index, step;
HashEntry * hashTblPtr;
BYTE * pchCandidate;
BYTE * pchKeyword;
INT nComp;
CHAR firstChar;
INT hashKey;
AssertSz( HASHSIZE == 257, "Remove custom MOD257.");
firstChar = *szKeyword;
hashKey = HashKeyword_Key(szKeyword); // For calc'ing 'index' and 'step'
//index = hashKey%HASHSIZE; // First entry to search.
index = MOD257(hashKey); // This formula gives us 18% perf.
hashTblPtr = hashtbl[index]; // Get first entry.
if ( hashTblPtr != NULL ) // Something there?
{
// Compare 2 C strings.
pchCandidate = (BYTE *)hashTblPtr->token->szKeyword;
if ( firstChar == *pchCandidate )
{
pchKeyword = (BYTE *)szKeyword;
while (!(nComp = *pchKeyword - *pchCandidate) // Be sure to match
&& *pchKeyword) // terminating 0's
{
pchKeyword++;
pchCandidate++;
}
// Matched?
if ( 0 == nComp )
return hashTblPtr->token;
}
if ( hashTblPtr->passBit==1 ) // passBit=>another entry to test
{
// step = 1+(hashKey%(HASHSIZE-1));// Calc 'step'
step = 1 + MOD257_1(hashKey);
// Get second entry to check.
index += step;
index = MOD257(index);
hashTblPtr = hashtbl[index];
while (hashTblPtr != NULL ) // While something there.
{
// Compare 2 C strings.
pchCandidate = (BYTE *)hashTblPtr->token->szKeyword;
if ( firstChar == *pchCandidate )
{
pchKeyword = (BYTE *)szKeyword;
while (!(nComp = *pchKeyword - *pchCandidate)
&& *pchKeyword)
{
pchKeyword++;
pchCandidate++;
}
// Matched?
if ( 0 == nComp )
return hashTblPtr->token;
}
if ( !hashTblPtr->passBit )// Done searching?
break;
// Get next entry.
index += step;
index = MOD257(index);
hashTblPtr = hashtbl[index];
}
}
}
return NULL;
}
/*
* HashKeyword_Init()
*
* @func
* Load up and init the hash table with RTF control words.
* @devnote
* _rtfHashInited will be FALSE if anything here fails.
*/
VOID HashKeyword_Init( )
{
TRACEBEGIN(TRCSUBSYSDISP, TRCSCOPEINTERN, "HashKeyword_Init");
extern SHORT cKeywords; // How many RTF keywords we currently recognize.
INT i;
AssertSz( _rtfHashInited == FALSE, "Only need to init this once.");
// Create enough storage for cKeywords
storage = (HashEntry *) PvAlloc( sizeof(HashEntry) * cKeywords, fZeroFill );
// Load in all of the RTF control words.
if ( storage )
{
_rtfHashInited = TRUE;
for (i = 0; i < cKeywords; i++ )
{
HashKeyword_Insert(&rgKeyword[i]);
}
#ifdef DEBUG // Make sure we can fetch all these keywords.
for (i = 0; i < cKeywords; i++ )
{
AssertSz ( &rgKeyword[i] == HashKeyword_Fetch ( rgKeyword[i].szKeyword ),
"Keyword Hash is not working.");
}
#endif
}
}
#endif // RTF_HASHCACHE