//+------------------------------------------------------------------------- // // Microsoft Windows // Copyright (C) Microsoft Corporation, 1994 - 2000. // // File: colhash.cxx // // Contents: Hash table compressions for large tables. // // Classes: CCompressedColHash // // Functions: GuidHash - Hash function for GUIDs // // History: 13 Apr 1994 AlanW Created // //-------------------------------------------------------------------------- #include "pch.cxx" #pragma hdrstop #include #include #include "tabledbg.hxx" #include "colcompr.hxx" const USHORT MAX_HASH_TABLE_SIZE = 32767; // Maximum hash table size //+------------------------------------------------------------------------- // // Function: GuidHash, public // // Synopsis: Hash a GUID value for use in a hash table. // // Arguments: [pbData] - pointer to the value to be hashed. // [cbData] - should be sizeof (GUID), unused // // Returns: ULONG - Hash value for the input GUID // // Notes: The hash function just xors a few selected fields out // of the GUID structure. It is intended to work well for // both generated GUIDs (from UuidCreate) and administratively // assigned GUIDs like OLE IIDs and CLSIDs. // //-------------------------------------------------------------------------- ULONG GuidHash( BYTE *pbData, USHORT cbData ) { UNALIGNED GUID *pGuid = (GUID *)pbData; return (pGuid->Data1 ^ (pGuid->Data4[0]<<16) ^ (pGuid->Data4[6]<<8) ^ (pGuid->Data4[7])); } //+------------------------------------------------------------------------- // // Method: CCompressedColHash::DefaultHash, public static // // Synopsis: Generic hash function // // Arguments: [pbData] - pointer to the value to be hashed. // [cbData] - size of pbData // // Returns: ULONG - Hash value for the input data // //-------------------------------------------------------------------------- //static ULONG CCompressedColHash::DefaultHash( BYTE *pbData, USHORT cbData ) { ULONG ulRet = cbData; while (cbData--) ulRet = (ulRet<<1) ^ *pbData++; return ulRet; } //+------------------------------------------------------------------------- // // Method: CCompressedColHash::CCompressedColHash, public // // Synopsis: Constructor for a hash compressed column. // // Arguments: [vtData] - type of each data item // [cbDataWidth] - size of each data item // [pfnHashFunction] - pointer to hash function // // Returns: pKey is filled in with the index of the data item in // the data array. // // Notes: // //-------------------------------------------------------------------------- CCompressedColHash::CCompressedColHash( VARTYPE vtData, USHORT cbDataWidth, PFNHASH pfnHashFunction) : CCompressedCol( vtData, // DataType sizeof (HASHKEY), // _cbKeyWidth CCompressedCol::FixedHash // _CompressionType ), _cbDataWidth(cbDataWidth), _pfnHash(pfnHashFunction), _pHashTable(NULL), _cHashEntries(0), _pDataItems(NULL), _cDataItems(0), _fGrowthInProgress(FALSE), _pData(NULL), _cbData(0), _ulMemCounter(0) { } CCompressedColHash::~CCompressedColHash( ) { if (_pData) { TblPageDealloc(_pData, _ulMemCounter); _pData = NULL; _cbData = 0; } Win4Assert(_ulMemCounter == 0); } //+------------------------------------------------------------------------- // // Method: CCompressedColHash::AddData, public // // Synopsis: Add a data entry to the hash table if it is not // already there. // // Arguments: [pVarnt] - pointer to data item // [pKey] - pointer to lookup key value // [reIndicator] - returns an indicator variable for // problems // // Returns: pKey is filled in with the index of the data item in // the data array. reIndicator is filled with an indication // of problems. // // Notes: // //-------------------------------------------------------------------------- VOID CCompressedColHash::AddData( PROPVARIANT const * const pVarnt, ULONG* pKey, GetValueResult& reIndicator ) { // // Specially handle the VT_EMPTY case // if (pVarnt->vt == VT_EMPTY) { *pKey = 0; reIndicator = GVRSuccess; return; } CTableVariant *pVar = (CTableVariant *)pVarnt; Win4Assert(pVarnt->vt == DataType); BYTE *pbData ; USHORT cbData = (USHORT) pVar->VarDataSize(); Win4Assert(cbData && cbData == _cbDataWidth); if (pVar->VariantPointerInFirstWord( )) { pbData = (BYTE *) pVar->pszVal; } else { Win4Assert(pVar->VariantPointerInSecondWord( )); pbData = (BYTE *) pVar->blob.pBlobData; } _AddData(pbData, cbData, pKey); reIndicator = GVRSuccess; return; } //+------------------------------------------------------------------------- // // Method: CCompressedColHash::_AddData, protected // // Synopsis: Helper for the public AddData method. Adds // a data entry to the hash table (if it does not already // exist). // // Arguments: [pbData] - pointer to data item // [cbDataSize] - size of data item // [pKey] - pointer to lookup key value // // Returns: pKey is filled in with the index of the data item in // the data array. // // Notes: // //-------------------------------------------------------------------------- VOID CCompressedColHash::_AddData( BYTE *pbData, USHORT cbDataSize, ULONG* pKey ) { Win4Assert(cbDataSize == _cbDataWidth); if (_pData == NULL) { _GrowHashTable(); } ULONG ulHash = _pfnHash(pbData, cbDataSize); ulHash %= _cHashEntries; HASHKEY* pusHashChain = &_pHashTable[ulHash]; HASHKEY* pusNextData; USHORT cChainLength = 0; while (*pusHashChain != 0) { cChainLength++; pusNextData = _IndexHashkey( *pusHashChain ); if (memcmp((BYTE *) (pusNextData+1), pbData, cbDataSize) == 0) { // // Found the data item. Return its index. // *pKey = *pusHashChain; return; } pusHashChain = pusNextData; } if (cChainLength > _maxChain) _maxChain = cChainLength; pusNextData = (HASHKEY *) ((BYTE *)_pDataItems + (_cDataItems) * (sizeof (HASHKEY) + _cbDataWidth)); if (((BYTE*)pusNextData + (sizeof (HASHKEY) + _cbDataWidth) - (BYTE *)_pData) > (int) _cbData || (_cDataItems > (ULONG) ( _cHashEntries * 3 ) && _cHashEntries < MAX_HASH_TABLE_SIZE && !_fGrowthInProgress)) { // // The new data will not fit in the table, or the hash chains will // be too long. Grow the table, then recurse. The table may be // rehashed, and can be moved when grown, so the lookup we've // already done may be invalid. // _GrowHashTable(); _AddData(pbData, cbDataSize, pKey); return; } // // Now add the new data item. The data item consists of a USHORT // for the hash chain, followed by the buffer for the fixed size // data item. // *pKey = *pusHashChain = ++_cDataItems; Win4Assert(_cDataItems != 0); // check for overflow *pusNextData++ = 0; RtlCopyMemory((BYTE *)pusNextData, pbData, _cbDataWidth); } //+------------------------------------------------------------------------- // // Method: CCompressedColHash::_Rehash, protected // // Synopsis: Helper function for the _GrowHashTable method. // reinserts an existing item into the hash table. // // Arguments: [pbData] - pointer to data item // [kData] - index to the data item in the table // // Returns: Nothing // // Notes: // //-------------------------------------------------------------------------- VOID CCompressedColHash::_Rehash( HASHKEY kData, BYTE *pbData ) { Win4Assert(_pData != NULL && kData > 0 && kData <= _cDataItems); ULONG ulHash = _pfnHash(pbData, _cbDataWidth); ulHash %= _cHashEntries; HASHKEY* pusHashChain = &_pHashTable[ulHash]; HASHKEY* pusNextData; USHORT cChainLength = 0; while (*pusHashChain != 0) { cChainLength++; pusNextData = _IndexHashkey( *pusHashChain ); pusHashChain = pusNextData; } if (cChainLength > _maxChain) _maxChain = cChainLength; pusNextData = _IndexHashkey( kData ); // // Now add the data item to the hash chain. // *pusHashChain = kData; *pusNextData++ = 0; Win4Assert((BYTE*)pusNextData == pbData); return; } //+------------------------------------------------------------------------- // // Method: CCompressedColHash::GetData, public // // Synopsis: Retrieve a value from the hash table. // // Arguments: [pVarnt] - pointer to variant in which to return the data // [PreferredType] - Peferred data type // [ulKey] - the lookup key value // [PropId] - (unused) property id being retrieved. // // Returns: pVarnt is filled with the result of the lookup. // // Notes: The PreferredType expresses the caller's preference only. // This method is free to return whatever type is most // convenient. // // The returned data does not conform to any alignment // restrictions on the data. // //-------------------------------------------------------------------------- GetValueResult CCompressedColHash::GetData( PROPVARIANT * pVarnt, VARTYPE PreferredType, ULONG ulKey, PROPID PropId ) { CTableVariant *pVar = (CTableVariant *)pVarnt; Win4Assert(PreferredType == DataType && ulKey >= 1 && ulKey <= _cDataItems); if (ulKey >= 1 && ulKey <= _cDataItems) { pVarnt->vt = DataType; BYTE *pbData = ((BYTE *)_pDataItems + (ulKey-1) * (sizeof (HASHKEY) + _cbDataWidth)) + sizeof (HASHKEY); if (pVar->VariantPointerInFirstWord( )) { pVar->pszVal = (CHAR*)pbData; } else { Win4Assert(pVar->VariantPointerInSecondWord( )); pVar->blob.pBlobData = pbData; } return GVRSuccess; } else { pVarnt->vt = VT_EMPTY; return GVRNotAvailable; } } void CCompressedColHash::FreeVariant(PROPVARIANT * pvarnt) { } //+------------------------------------------------------------------------- // // Method: CCompressedColHash::_GrowHashTable, protected // // Synopsis: Grow the space allocated to the hash table and data // items. // // Arguments: - none - // // Returns: Nothing // // Notes: Also called to allocate the initial data area. // // The number of hash buckets starts out at a low // number, then is increased as the amount of data // grows. Data items must be rehashed when this occurs. // Since items are identified by their offset in the // data array, this must not change while rehashing. // //-------------------------------------------------------------------------- const unsigned MIN_HASH_TABLE_SIZE = 11; // Minimum hash table size inline USHORT CCompressedColHash::_NextHashSize( HASHKEY cItems, USHORT cHash ) { do { cHash = cHash*2 + 1; } while (cHash < _cDataItems); return (cHash < MAX_HASH_TABLE_SIZE) ? cHash : MAX_HASH_TABLE_SIZE; } VOID CCompressedColHash::_GrowHashTable( void ) { ULONG cbSize; USHORT cNewHashEntries; int fRehash = FALSE; Win4Assert(!_fGrowthInProgress && "Recursive call to CCompressedColHash::_GrowHashTable"); _fGrowthInProgress = TRUE; if (_pData == NULL) { cNewHashEntries = MIN_HASH_TABLE_SIZE; } else if (_cHashEntries < MAX_HASH_TABLE_SIZE && (_cDataItems > (ULONG) _cHashEntries*2 || (_cDataItems > _cHashEntries && _maxChain > 3))) { cNewHashEntries = _NextHashSize(_cDataItems, _cHashEntries); fRehash = TRUE; tbDebugOut((DEB_ITRACE, "Growing hash table, old,new sizes = %d,%d\n", _cHashEntries, cNewHashEntries)); } // // Compute the required size of the hash table and data // cbSize = _cHashEntries * sizeof(HASHKEY); cbSize += (_cDataItems + 4) * (_cbDataWidth + sizeof (HASHKEY)); cbSize = TblPageGrowSize(cbSize, TRUE); Win4Assert(cbSize > _cbData || (fRehash && cbSize == _cbData)); BYTE *pbNewData; if (_pData && cbSize < TBL_PAGE_MAX_SEGMENT_SIZE) { pbNewData = (BYTE *) TblPageRealloc(_pData, _ulMemCounter, cbSize, 0); } else { pbNewData = (BYTE *)TblPageAlloc(cbSize, _ulMemCounter, TBL_SIG_COMPRESSED); } tbDebugOut((DEB_ITRACE, "New hash table at = %x\n", pbNewData)); if (_pData != NULL && !fRehash) { if (_pData != pbNewData) { RtlCopyMemory(pbNewData, _pData, _cbData); TblPageDealloc(_pData, _ulMemCounter, _cbData); _pData = pbNewData; } _cbData = cbSize; _pHashTable = (HASHKEY *) _pData; _pDataItems = (BYTE *) (_pHashTable + _cHashEntries); } else { BYTE *pOldDataItems = _pDataItems; VOID *pOldData = _pData; ULONG cbOldSize = _cbData; _pData = pbNewData; _cbData = cbSize; _pHashTable = (HASHKEY *)_pData; _cHashEntries = cNewHashEntries; _pDataItems = (BYTE *) (_pHashTable + _cHashEntries); if (pOldData != NULL) RtlMoveMemory(_pDataItems, pOldDataItems, _cDataItems * (sizeof (HASHKEY) + _cbDataWidth)); RtlZeroMemory(_pHashTable, cNewHashEntries * sizeof (HASHKEY)); _maxChain = 0; // // Now re-add all old data items to the hash table. // pOldDataItems = _pDataItems; for (HASHKEY i=1; i<=_cDataItems; i++) { pOldDataItems += sizeof (HASHKEY); // skip hash chain _Rehash(i, pOldDataItems); pOldDataItems += _cbDataWidth; // skip data item } if (pOldData != NULL && pOldData != _pData) TblPageDealloc(pOldData, _ulMemCounter, cbOldSize); } _fGrowthInProgress = FALSE; return; } //+--------------------------------------------------------------------------- // // Function: _ClearAll // // Synopsis: Method clears all the data in the "fixed width" part of the // memory buffer. // // Arguments: (none) // // History: 12-16-94 srikants Created // // Notes: // //---------------------------------------------------------------------------- void CCompressedColHash::_ClearAll() { RtlZeroMemory(_pHashTable, _cHashEntries * sizeof (HASHKEY)); RtlZeroMemory(_pDataItems, _cDataItems * _cbDataWidth ); _cDataItems = 0; }