525 lines
15 KiB
C++
525 lines
15 KiB
C++
//+-------------------------------------------------------------------------
|
|
//
|
|
// Microsoft Windows
|
|
// Copyright (C) Microsoft Corporation, 1994 - 2000.
|
|
//
|
|
// File: colhash.cxx
|
|
//
|
|
// Contents: Hash table compressions for large tables.
|
|
//
|
|
// Classes: CCompressedColHash
|
|
//
|
|
// Functions: GuidHash - Hash function for GUIDs
|
|
//
|
|
// History: 13 Apr 1994 AlanW Created
|
|
//
|
|
//--------------------------------------------------------------------------
|
|
|
|
#include "pch.cxx"
|
|
#pragma hdrstop
|
|
|
|
#include <objcur.hxx>
|
|
#include <tblvarnt.hxx>
|
|
|
|
#include "tabledbg.hxx"
|
|
#include "colcompr.hxx"
|
|
|
|
|
|
const USHORT MAX_HASH_TABLE_SIZE = 32767; // Maximum hash table size
|
|
|
|
//+-------------------------------------------------------------------------
|
|
//
|
|
// Function: GuidHash, public
|
|
//
|
|
// Synopsis: Hash a GUID value for use in a hash table.
|
|
//
|
|
// Arguments: [pbData] - pointer to the value to be hashed.
|
|
// [cbData] - should be sizeof (GUID), unused
|
|
//
|
|
// Returns: ULONG - Hash value for the input GUID
|
|
//
|
|
// Notes: The hash function just xors a few selected fields out
|
|
// of the GUID structure. It is intended to work well for
|
|
// both generated GUIDs (from UuidCreate) and administratively
|
|
// assigned GUIDs like OLE IIDs and CLSIDs.
|
|
//
|
|
//--------------------------------------------------------------------------
|
|
|
|
ULONG GuidHash(
|
|
BYTE *pbData,
|
|
USHORT cbData
|
|
) {
|
|
UNALIGNED GUID *pGuid = (GUID *)pbData;
|
|
return (pGuid->Data1 ^
|
|
(pGuid->Data4[0]<<16) ^
|
|
(pGuid->Data4[6]<<8) ^
|
|
(pGuid->Data4[7]));
|
|
}
|
|
|
|
|
|
//+-------------------------------------------------------------------------
|
|
//
|
|
// Method: CCompressedColHash::DefaultHash, public static
|
|
//
|
|
// Synopsis: Generic hash function
|
|
//
|
|
// Arguments: [pbData] - pointer to the value to be hashed.
|
|
// [cbData] - size of pbData
|
|
//
|
|
// Returns: ULONG - Hash value for the input data
|
|
//
|
|
//--------------------------------------------------------------------------
|
|
|
|
//static
|
|
ULONG CCompressedColHash::DefaultHash(
|
|
BYTE *pbData,
|
|
USHORT cbData
|
|
) {
|
|
ULONG ulRet = cbData;
|
|
|
|
while (cbData--)
|
|
ulRet = (ulRet<<1) ^ *pbData++;
|
|
|
|
return ulRet;
|
|
}
|
|
|
|
|
|
//+-------------------------------------------------------------------------
|
|
//
|
|
// Method: CCompressedColHash::CCompressedColHash, public
|
|
//
|
|
// Synopsis: Constructor for a hash compressed column.
|
|
//
|
|
// Arguments: [vtData] - type of each data item
|
|
// [cbDataWidth] - size of each data item
|
|
// [pfnHashFunction] - pointer to hash function
|
|
//
|
|
// Returns: pKey is filled in with the index of the data item in
|
|
// the data array.
|
|
//
|
|
// Notes:
|
|
//
|
|
//--------------------------------------------------------------------------
|
|
|
|
|
|
CCompressedColHash::CCompressedColHash(
|
|
VARTYPE vtData,
|
|
USHORT cbDataWidth,
|
|
PFNHASH pfnHashFunction) :
|
|
CCompressedCol(
|
|
vtData, // DataType
|
|
sizeof (HASHKEY), // _cbKeyWidth
|
|
CCompressedCol::FixedHash // _CompressionType
|
|
),
|
|
|
|
_cbDataWidth(cbDataWidth),
|
|
_pfnHash(pfnHashFunction),
|
|
_pHashTable(NULL), _cHashEntries(0),
|
|
_pDataItems(NULL), _cDataItems(0),
|
|
_fGrowthInProgress(FALSE),
|
|
_pData(NULL), _cbData(0),
|
|
_ulMemCounter(0)
|
|
{
|
|
|
|
}
|
|
|
|
|
|
CCompressedColHash::~CCompressedColHash( )
|
|
{
|
|
if (_pData) {
|
|
TblPageDealloc(_pData, _ulMemCounter);
|
|
_pData = NULL;
|
|
_cbData = 0;
|
|
}
|
|
Win4Assert(_ulMemCounter == 0);
|
|
}
|
|
|
|
|
|
//+-------------------------------------------------------------------------
|
|
//
|
|
// Method: CCompressedColHash::AddData, public
|
|
//
|
|
// Synopsis: Add a data entry to the hash table if it is not
|
|
// already there.
|
|
//
|
|
// Arguments: [pVarnt] - pointer to data item
|
|
// [pKey] - pointer to lookup key value
|
|
// [reIndicator] - returns an indicator variable for
|
|
// problems
|
|
//
|
|
// Returns: pKey is filled in with the index of the data item in
|
|
// the data array. reIndicator is filled with an indication
|
|
// of problems.
|
|
//
|
|
// Notes:
|
|
//
|
|
//--------------------------------------------------------------------------
|
|
|
|
VOID CCompressedColHash::AddData(
|
|
PROPVARIANT const * const pVarnt,
|
|
ULONG* pKey,
|
|
GetValueResult& reIndicator
|
|
) {
|
|
//
|
|
// Specially handle the VT_EMPTY case
|
|
//
|
|
if (pVarnt->vt == VT_EMPTY) {
|
|
*pKey = 0;
|
|
reIndicator = GVRSuccess;
|
|
return;
|
|
}
|
|
|
|
CTableVariant *pVar = (CTableVariant *)pVarnt;
|
|
Win4Assert(pVarnt->vt == DataType);
|
|
|
|
BYTE *pbData ;
|
|
USHORT cbData = (USHORT) pVar->VarDataSize();
|
|
|
|
Win4Assert(cbData && cbData == _cbDataWidth);
|
|
if (pVar->VariantPointerInFirstWord( )) {
|
|
pbData = (BYTE *) pVar->pszVal;
|
|
} else {
|
|
Win4Assert(pVar->VariantPointerInSecondWord( ));
|
|
pbData = (BYTE *) pVar->blob.pBlobData;
|
|
}
|
|
|
|
_AddData(pbData, cbData, pKey);
|
|
reIndicator = GVRSuccess;
|
|
return;
|
|
}
|
|
|
|
|
|
//+-------------------------------------------------------------------------
|
|
//
|
|
// Method: CCompressedColHash::_AddData, protected
|
|
//
|
|
// Synopsis: Helper for the public AddData method. Adds
|
|
// a data entry to the hash table (if it does not already
|
|
// exist).
|
|
//
|
|
// Arguments: [pbData] - pointer to data item
|
|
// [cbDataSize] - size of data item
|
|
// [pKey] - pointer to lookup key value
|
|
//
|
|
// Returns: pKey is filled in with the index of the data item in
|
|
// the data array.
|
|
//
|
|
// Notes:
|
|
//
|
|
//--------------------------------------------------------------------------
|
|
|
|
VOID CCompressedColHash::_AddData(
|
|
BYTE *pbData,
|
|
USHORT cbDataSize,
|
|
ULONG* pKey
|
|
) {
|
|
Win4Assert(cbDataSize == _cbDataWidth);
|
|
|
|
if (_pData == NULL) {
|
|
_GrowHashTable();
|
|
}
|
|
|
|
ULONG ulHash = _pfnHash(pbData, cbDataSize);
|
|
|
|
ulHash %= _cHashEntries;
|
|
|
|
HASHKEY* pusHashChain = &_pHashTable[ulHash];
|
|
HASHKEY* pusNextData;
|
|
USHORT cChainLength = 0;
|
|
|
|
while (*pusHashChain != 0) {
|
|
cChainLength++;
|
|
pusNextData = _IndexHashkey( *pusHashChain );
|
|
|
|
if (memcmp((BYTE *) (pusNextData+1), pbData, cbDataSize) == 0) {
|
|
//
|
|
// Found the data item. Return its index.
|
|
//
|
|
*pKey = *pusHashChain;
|
|
return;
|
|
}
|
|
pusHashChain = pusNextData;
|
|
}
|
|
if (cChainLength > _maxChain)
|
|
_maxChain = cChainLength;
|
|
|
|
pusNextData = (HASHKEY *) ((BYTE *)_pDataItems +
|
|
(_cDataItems) * (sizeof (HASHKEY) + _cbDataWidth));
|
|
if (((BYTE*)pusNextData + (sizeof (HASHKEY) + _cbDataWidth) -
|
|
(BYTE *)_pData) > (int) _cbData ||
|
|
(_cDataItems > (ULONG) ( _cHashEntries * 3 ) &&
|
|
_cHashEntries < MAX_HASH_TABLE_SIZE &&
|
|
!_fGrowthInProgress)) {
|
|
|
|
//
|
|
// The new data will not fit in the table, or the hash chains will
|
|
// be too long. Grow the table, then recurse. The table may be
|
|
// rehashed, and can be moved when grown, so the lookup we've
|
|
// already done may be invalid.
|
|
//
|
|
_GrowHashTable();
|
|
_AddData(pbData, cbDataSize, pKey);
|
|
return;
|
|
}
|
|
|
|
//
|
|
// Now add the new data item. The data item consists of a USHORT
|
|
// for the hash chain, followed by the buffer for the fixed size
|
|
// data item.
|
|
//
|
|
|
|
*pKey = *pusHashChain = ++_cDataItems;
|
|
Win4Assert(_cDataItems != 0); // check for overflow
|
|
*pusNextData++ = 0;
|
|
RtlCopyMemory((BYTE *)pusNextData, pbData, _cbDataWidth);
|
|
}
|
|
|
|
|
|
//+-------------------------------------------------------------------------
|
|
//
|
|
// Method: CCompressedColHash::_Rehash, protected
|
|
//
|
|
// Synopsis: Helper function for the _GrowHashTable method.
|
|
// reinserts an existing item into the hash table.
|
|
//
|
|
// Arguments: [pbData] - pointer to data item
|
|
// [kData] - index to the data item in the table
|
|
//
|
|
// Returns: Nothing
|
|
//
|
|
// Notes:
|
|
//
|
|
//--------------------------------------------------------------------------
|
|
|
|
VOID CCompressedColHash::_Rehash(
|
|
HASHKEY kData,
|
|
BYTE *pbData
|
|
) {
|
|
Win4Assert(_pData != NULL && kData > 0 && kData <= _cDataItems);
|
|
|
|
ULONG ulHash = _pfnHash(pbData, _cbDataWidth);
|
|
|
|
ulHash %= _cHashEntries;
|
|
|
|
HASHKEY* pusHashChain = &_pHashTable[ulHash];
|
|
HASHKEY* pusNextData;
|
|
USHORT cChainLength = 0;
|
|
|
|
while (*pusHashChain != 0) {
|
|
cChainLength++;
|
|
pusNextData = _IndexHashkey( *pusHashChain );
|
|
pusHashChain = pusNextData;
|
|
}
|
|
if (cChainLength > _maxChain)
|
|
_maxChain = cChainLength;
|
|
|
|
pusNextData = _IndexHashkey( kData );
|
|
|
|
//
|
|
// Now add the data item to the hash chain.
|
|
//
|
|
|
|
*pusHashChain = kData;
|
|
*pusNextData++ = 0;
|
|
Win4Assert((BYTE*)pusNextData == pbData);
|
|
return;
|
|
}
|
|
|
|
|
|
|
|
//+-------------------------------------------------------------------------
|
|
//
|
|
// Method: CCompressedColHash::GetData, public
|
|
//
|
|
// Synopsis: Retrieve a value from the hash table.
|
|
//
|
|
// Arguments: [pVarnt] - pointer to variant in which to return the data
|
|
// [PreferredType] - Peferred data type
|
|
// [ulKey] - the lookup key value
|
|
// [PropId] - (unused) property id being retrieved.
|
|
//
|
|
// Returns: pVarnt is filled with the result of the lookup.
|
|
//
|
|
// Notes: The PreferredType expresses the caller's preference only.
|
|
// This method is free to return whatever type is most
|
|
// convenient.
|
|
//
|
|
// The returned data does not conform to any alignment
|
|
// restrictions on the data.
|
|
//
|
|
//--------------------------------------------------------------------------
|
|
|
|
GetValueResult CCompressedColHash::GetData(
|
|
PROPVARIANT * pVarnt,
|
|
VARTYPE PreferredType,
|
|
ULONG ulKey,
|
|
PROPID PropId
|
|
) {
|
|
CTableVariant *pVar = (CTableVariant *)pVarnt;
|
|
Win4Assert(PreferredType == DataType && ulKey >= 1 && ulKey <= _cDataItems);
|
|
|
|
if (ulKey >= 1 && ulKey <= _cDataItems) {
|
|
pVarnt->vt = DataType;
|
|
|
|
BYTE *pbData = ((BYTE *)_pDataItems +
|
|
(ulKey-1) * (sizeof (HASHKEY) + _cbDataWidth)) +
|
|
sizeof (HASHKEY);
|
|
|
|
if (pVar->VariantPointerInFirstWord( )) {
|
|
pVar->pszVal = (CHAR*)pbData;
|
|
} else {
|
|
Win4Assert(pVar->VariantPointerInSecondWord( ));
|
|
pVar->blob.pBlobData = pbData;
|
|
}
|
|
return GVRSuccess;
|
|
} else {
|
|
pVarnt->vt = VT_EMPTY;
|
|
return GVRNotAvailable;
|
|
}
|
|
}
|
|
|
|
void CCompressedColHash::FreeVariant(PROPVARIANT * pvarnt) { }
|
|
|
|
|
|
|
|
//+-------------------------------------------------------------------------
|
|
//
|
|
// Method: CCompressedColHash::_GrowHashTable, protected
|
|
//
|
|
// Synopsis: Grow the space allocated to the hash table and data
|
|
// items.
|
|
//
|
|
// Arguments: - none -
|
|
//
|
|
// Returns: Nothing
|
|
//
|
|
// Notes: Also called to allocate the initial data area.
|
|
//
|
|
// The number of hash buckets starts out at a low
|
|
// number, then is increased as the amount of data
|
|
// grows. Data items must be rehashed when this occurs.
|
|
// Since items are identified by their offset in the
|
|
// data array, this must not change while rehashing.
|
|
//
|
|
//--------------------------------------------------------------------------
|
|
|
|
const unsigned MIN_HASH_TABLE_SIZE = 11; // Minimum hash table size
|
|
|
|
|
|
inline USHORT CCompressedColHash::_NextHashSize(
|
|
HASHKEY cItems,
|
|
USHORT cHash
|
|
) {
|
|
do {
|
|
cHash = cHash*2 + 1;
|
|
} while (cHash < _cDataItems);
|
|
return (cHash < MAX_HASH_TABLE_SIZE) ? cHash : MAX_HASH_TABLE_SIZE;
|
|
}
|
|
|
|
|
|
VOID CCompressedColHash::_GrowHashTable( void )
|
|
{
|
|
ULONG cbSize;
|
|
USHORT cNewHashEntries;
|
|
int fRehash = FALSE;
|
|
|
|
Win4Assert(!_fGrowthInProgress &&
|
|
"Recursive call to CCompressedColHash::_GrowHashTable");
|
|
|
|
_fGrowthInProgress = TRUE;
|
|
if (_pData == NULL) {
|
|
cNewHashEntries = MIN_HASH_TABLE_SIZE;
|
|
} else if (_cHashEntries < MAX_HASH_TABLE_SIZE &&
|
|
(_cDataItems > (ULONG) _cHashEntries*2 ||
|
|
(_cDataItems > _cHashEntries && _maxChain > 3))) {
|
|
cNewHashEntries = _NextHashSize(_cDataItems, _cHashEntries);
|
|
fRehash = TRUE;
|
|
tbDebugOut((DEB_ITRACE, "Growing hash table, old,new sizes = %d,%d\n",
|
|
_cHashEntries, cNewHashEntries));
|
|
}
|
|
|
|
//
|
|
// Compute the required size of the hash table and data
|
|
//
|
|
cbSize = _cHashEntries * sizeof(HASHKEY);
|
|
cbSize += (_cDataItems + 4) * (_cbDataWidth + sizeof (HASHKEY));
|
|
cbSize = TblPageGrowSize(cbSize, TRUE);
|
|
Win4Assert(cbSize > _cbData || (fRehash && cbSize == _cbData));
|
|
|
|
BYTE *pbNewData;
|
|
|
|
if (_pData && cbSize < TBL_PAGE_MAX_SEGMENT_SIZE) {
|
|
pbNewData = (BYTE *)
|
|
TblPageRealloc(_pData, _ulMemCounter, cbSize, 0);
|
|
} else {
|
|
pbNewData =
|
|
(BYTE *)TblPageAlloc(cbSize, _ulMemCounter, TBL_SIG_COMPRESSED);
|
|
}
|
|
|
|
tbDebugOut((DEB_ITRACE, "New hash table at = %x\n", pbNewData));
|
|
|
|
if (_pData != NULL && !fRehash) {
|
|
if (_pData != pbNewData) {
|
|
RtlCopyMemory(pbNewData, _pData, _cbData);
|
|
TblPageDealloc(_pData, _ulMemCounter, _cbData);
|
|
_pData = pbNewData;
|
|
}
|
|
_cbData = cbSize;
|
|
_pHashTable = (HASHKEY *) _pData;
|
|
_pDataItems = (BYTE *) (_pHashTable + _cHashEntries);
|
|
} else {
|
|
BYTE *pOldDataItems = _pDataItems;
|
|
VOID *pOldData = _pData;
|
|
ULONG cbOldSize = _cbData;
|
|
|
|
_pData = pbNewData;
|
|
_cbData = cbSize;
|
|
_pHashTable = (HASHKEY *)_pData;
|
|
_cHashEntries = cNewHashEntries;
|
|
_pDataItems = (BYTE *) (_pHashTable + _cHashEntries);
|
|
if (pOldData != NULL)
|
|
RtlMoveMemory(_pDataItems,
|
|
pOldDataItems,
|
|
_cDataItems * (sizeof (HASHKEY) + _cbDataWidth));
|
|
RtlZeroMemory(_pHashTable, cNewHashEntries * sizeof (HASHKEY));
|
|
_maxChain = 0;
|
|
|
|
//
|
|
// Now re-add all old data items to the hash table.
|
|
//
|
|
pOldDataItems = _pDataItems;
|
|
for (HASHKEY i=1; i<=_cDataItems; i++) {
|
|
pOldDataItems += sizeof (HASHKEY); // skip hash chain
|
|
_Rehash(i, pOldDataItems);
|
|
pOldDataItems += _cbDataWidth; // skip data item
|
|
}
|
|
if (pOldData != NULL && pOldData != _pData)
|
|
TblPageDealloc(pOldData, _ulMemCounter, cbOldSize);
|
|
}
|
|
|
|
_fGrowthInProgress = FALSE;
|
|
return;
|
|
}
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Function: _ClearAll
|
|
//
|
|
// Synopsis: Method clears all the data in the "fixed width" part of the
|
|
// memory buffer.
|
|
//
|
|
// Arguments: (none)
|
|
//
|
|
// History: 12-16-94 srikants Created
|
|
//
|
|
// Notes:
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
|
|
void CCompressedColHash::_ClearAll()
|
|
{
|
|
RtlZeroMemory(_pHashTable, _cHashEntries * sizeof (HASHKEY));
|
|
RtlZeroMemory(_pDataItems, _cDataItems * _cbDataWidth );
|
|
_cDataItems = 0;
|
|
}
|