windows-nt/Source/XPSP1/NT/inetsrv/query/bigtable/colhash.cxx

525 lines
15 KiB
C++
Raw Normal View History

2020-09-26 03:20:57 -05:00
//+-------------------------------------------------------------------------
//
// Microsoft Windows
// Copyright (C) Microsoft Corporation, 1994 - 2000.
//
// File: colhash.cxx
//
// Contents: Hash table compressions for large tables.
//
// Classes: CCompressedColHash
//
// Functions: GuidHash - Hash function for GUIDs
//
// History: 13 Apr 1994 AlanW Created
//
//--------------------------------------------------------------------------
#include "pch.cxx"
#pragma hdrstop
#include <objcur.hxx>
#include <tblvarnt.hxx>
#include "tabledbg.hxx"
#include "colcompr.hxx"
const USHORT MAX_HASH_TABLE_SIZE = 32767; // Maximum hash table size
//+-------------------------------------------------------------------------
//
// Function: GuidHash, public
//
// Synopsis: Hash a GUID value for use in a hash table.
//
// Arguments: [pbData] - pointer to the value to be hashed.
// [cbData] - should be sizeof (GUID), unused
//
// Returns: ULONG - Hash value for the input GUID
//
// Notes: The hash function just xors a few selected fields out
// of the GUID structure. It is intended to work well for
// both generated GUIDs (from UuidCreate) and administratively
// assigned GUIDs like OLE IIDs and CLSIDs.
//
//--------------------------------------------------------------------------
ULONG GuidHash(
BYTE *pbData,
USHORT cbData
) {
UNALIGNED GUID *pGuid = (GUID *)pbData;
return (pGuid->Data1 ^
(pGuid->Data4[0]<<16) ^
(pGuid->Data4[6]<<8) ^
(pGuid->Data4[7]));
}
//+-------------------------------------------------------------------------
//
// Method: CCompressedColHash::DefaultHash, public static
//
// Synopsis: Generic hash function
//
// Arguments: [pbData] - pointer to the value to be hashed.
// [cbData] - size of pbData
//
// Returns: ULONG - Hash value for the input data
//
//--------------------------------------------------------------------------
//static
ULONG CCompressedColHash::DefaultHash(
BYTE *pbData,
USHORT cbData
) {
ULONG ulRet = cbData;
while (cbData--)
ulRet = (ulRet<<1) ^ *pbData++;
return ulRet;
}
//+-------------------------------------------------------------------------
//
// Method: CCompressedColHash::CCompressedColHash, public
//
// Synopsis: Constructor for a hash compressed column.
//
// Arguments: [vtData] - type of each data item
// [cbDataWidth] - size of each data item
// [pfnHashFunction] - pointer to hash function
//
// Returns: pKey is filled in with the index of the data item in
// the data array.
//
// Notes:
//
//--------------------------------------------------------------------------
CCompressedColHash::CCompressedColHash(
VARTYPE vtData,
USHORT cbDataWidth,
PFNHASH pfnHashFunction) :
CCompressedCol(
vtData, // DataType
sizeof (HASHKEY), // _cbKeyWidth
CCompressedCol::FixedHash // _CompressionType
),
_cbDataWidth(cbDataWidth),
_pfnHash(pfnHashFunction),
_pHashTable(NULL), _cHashEntries(0),
_pDataItems(NULL), _cDataItems(0),
_fGrowthInProgress(FALSE),
_pData(NULL), _cbData(0),
_ulMemCounter(0)
{
}
CCompressedColHash::~CCompressedColHash( )
{
if (_pData) {
TblPageDealloc(_pData, _ulMemCounter);
_pData = NULL;
_cbData = 0;
}
Win4Assert(_ulMemCounter == 0);
}
//+-------------------------------------------------------------------------
//
// Method: CCompressedColHash::AddData, public
//
// Synopsis: Add a data entry to the hash table if it is not
// already there.
//
// Arguments: [pVarnt] - pointer to data item
// [pKey] - pointer to lookup key value
// [reIndicator] - returns an indicator variable for
// problems
//
// Returns: pKey is filled in with the index of the data item in
// the data array. reIndicator is filled with an indication
// of problems.
//
// Notes:
//
//--------------------------------------------------------------------------
VOID CCompressedColHash::AddData(
PROPVARIANT const * const pVarnt,
ULONG* pKey,
GetValueResult& reIndicator
) {
//
// Specially handle the VT_EMPTY case
//
if (pVarnt->vt == VT_EMPTY) {
*pKey = 0;
reIndicator = GVRSuccess;
return;
}
CTableVariant *pVar = (CTableVariant *)pVarnt;
Win4Assert(pVarnt->vt == DataType);
BYTE *pbData ;
USHORT cbData = (USHORT) pVar->VarDataSize();
Win4Assert(cbData && cbData == _cbDataWidth);
if (pVar->VariantPointerInFirstWord( )) {
pbData = (BYTE *) pVar->pszVal;
} else {
Win4Assert(pVar->VariantPointerInSecondWord( ));
pbData = (BYTE *) pVar->blob.pBlobData;
}
_AddData(pbData, cbData, pKey);
reIndicator = GVRSuccess;
return;
}
//+-------------------------------------------------------------------------
//
// Method: CCompressedColHash::_AddData, protected
//
// Synopsis: Helper for the public AddData method. Adds
// a data entry to the hash table (if it does not already
// exist).
//
// Arguments: [pbData] - pointer to data item
// [cbDataSize] - size of data item
// [pKey] - pointer to lookup key value
//
// Returns: pKey is filled in with the index of the data item in
// the data array.
//
// Notes:
//
//--------------------------------------------------------------------------
VOID CCompressedColHash::_AddData(
BYTE *pbData,
USHORT cbDataSize,
ULONG* pKey
) {
Win4Assert(cbDataSize == _cbDataWidth);
if (_pData == NULL) {
_GrowHashTable();
}
ULONG ulHash = _pfnHash(pbData, cbDataSize);
ulHash %= _cHashEntries;
HASHKEY* pusHashChain = &_pHashTable[ulHash];
HASHKEY* pusNextData;
USHORT cChainLength = 0;
while (*pusHashChain != 0) {
cChainLength++;
pusNextData = _IndexHashkey( *pusHashChain );
if (memcmp((BYTE *) (pusNextData+1), pbData, cbDataSize) == 0) {
//
// Found the data item. Return its index.
//
*pKey = *pusHashChain;
return;
}
pusHashChain = pusNextData;
}
if (cChainLength > _maxChain)
_maxChain = cChainLength;
pusNextData = (HASHKEY *) ((BYTE *)_pDataItems +
(_cDataItems) * (sizeof (HASHKEY) + _cbDataWidth));
if (((BYTE*)pusNextData + (sizeof (HASHKEY) + _cbDataWidth) -
(BYTE *)_pData) > (int) _cbData ||
(_cDataItems > (ULONG) ( _cHashEntries * 3 ) &&
_cHashEntries < MAX_HASH_TABLE_SIZE &&
!_fGrowthInProgress)) {
//
// The new data will not fit in the table, or the hash chains will
// be too long. Grow the table, then recurse. The table may be
// rehashed, and can be moved when grown, so the lookup we've
// already done may be invalid.
//
_GrowHashTable();
_AddData(pbData, cbDataSize, pKey);
return;
}
//
// Now add the new data item. The data item consists of a USHORT
// for the hash chain, followed by the buffer for the fixed size
// data item.
//
*pKey = *pusHashChain = ++_cDataItems;
Win4Assert(_cDataItems != 0); // check for overflow
*pusNextData++ = 0;
RtlCopyMemory((BYTE *)pusNextData, pbData, _cbDataWidth);
}
//+-------------------------------------------------------------------------
//
// Method: CCompressedColHash::_Rehash, protected
//
// Synopsis: Helper function for the _GrowHashTable method.
// reinserts an existing item into the hash table.
//
// Arguments: [pbData] - pointer to data item
// [kData] - index to the data item in the table
//
// Returns: Nothing
//
// Notes:
//
//--------------------------------------------------------------------------
VOID CCompressedColHash::_Rehash(
HASHKEY kData,
BYTE *pbData
) {
Win4Assert(_pData != NULL && kData > 0 && kData <= _cDataItems);
ULONG ulHash = _pfnHash(pbData, _cbDataWidth);
ulHash %= _cHashEntries;
HASHKEY* pusHashChain = &_pHashTable[ulHash];
HASHKEY* pusNextData;
USHORT cChainLength = 0;
while (*pusHashChain != 0) {
cChainLength++;
pusNextData = _IndexHashkey( *pusHashChain );
pusHashChain = pusNextData;
}
if (cChainLength > _maxChain)
_maxChain = cChainLength;
pusNextData = _IndexHashkey( kData );
//
// Now add the data item to the hash chain.
//
*pusHashChain = kData;
*pusNextData++ = 0;
Win4Assert((BYTE*)pusNextData == pbData);
return;
}
//+-------------------------------------------------------------------------
//
// Method: CCompressedColHash::GetData, public
//
// Synopsis: Retrieve a value from the hash table.
//
// Arguments: [pVarnt] - pointer to variant in which to return the data
// [PreferredType] - Peferred data type
// [ulKey] - the lookup key value
// [PropId] - (unused) property id being retrieved.
//
// Returns: pVarnt is filled with the result of the lookup.
//
// Notes: The PreferredType expresses the caller's preference only.
// This method is free to return whatever type is most
// convenient.
//
// The returned data does not conform to any alignment
// restrictions on the data.
//
//--------------------------------------------------------------------------
GetValueResult CCompressedColHash::GetData(
PROPVARIANT * pVarnt,
VARTYPE PreferredType,
ULONG ulKey,
PROPID PropId
) {
CTableVariant *pVar = (CTableVariant *)pVarnt;
Win4Assert(PreferredType == DataType && ulKey >= 1 && ulKey <= _cDataItems);
if (ulKey >= 1 && ulKey <= _cDataItems) {
pVarnt->vt = DataType;
BYTE *pbData = ((BYTE *)_pDataItems +
(ulKey-1) * (sizeof (HASHKEY) + _cbDataWidth)) +
sizeof (HASHKEY);
if (pVar->VariantPointerInFirstWord( )) {
pVar->pszVal = (CHAR*)pbData;
} else {
Win4Assert(pVar->VariantPointerInSecondWord( ));
pVar->blob.pBlobData = pbData;
}
return GVRSuccess;
} else {
pVarnt->vt = VT_EMPTY;
return GVRNotAvailable;
}
}
void CCompressedColHash::FreeVariant(PROPVARIANT * pvarnt) { }
//+-------------------------------------------------------------------------
//
// Method: CCompressedColHash::_GrowHashTable, protected
//
// Synopsis: Grow the space allocated to the hash table and data
// items.
//
// Arguments: - none -
//
// Returns: Nothing
//
// Notes: Also called to allocate the initial data area.
//
// The number of hash buckets starts out at a low
// number, then is increased as the amount of data
// grows. Data items must be rehashed when this occurs.
// Since items are identified by their offset in the
// data array, this must not change while rehashing.
//
//--------------------------------------------------------------------------
const unsigned MIN_HASH_TABLE_SIZE = 11; // Minimum hash table size
inline USHORT CCompressedColHash::_NextHashSize(
HASHKEY cItems,
USHORT cHash
) {
do {
cHash = cHash*2 + 1;
} while (cHash < _cDataItems);
return (cHash < MAX_HASH_TABLE_SIZE) ? cHash : MAX_HASH_TABLE_SIZE;
}
VOID CCompressedColHash::_GrowHashTable( void )
{
ULONG cbSize;
USHORT cNewHashEntries;
int fRehash = FALSE;
Win4Assert(!_fGrowthInProgress &&
"Recursive call to CCompressedColHash::_GrowHashTable");
_fGrowthInProgress = TRUE;
if (_pData == NULL) {
cNewHashEntries = MIN_HASH_TABLE_SIZE;
} else if (_cHashEntries < MAX_HASH_TABLE_SIZE &&
(_cDataItems > (ULONG) _cHashEntries*2 ||
(_cDataItems > _cHashEntries && _maxChain > 3))) {
cNewHashEntries = _NextHashSize(_cDataItems, _cHashEntries);
fRehash = TRUE;
tbDebugOut((DEB_ITRACE, "Growing hash table, old,new sizes = %d,%d\n",
_cHashEntries, cNewHashEntries));
}
//
// Compute the required size of the hash table and data
//
cbSize = _cHashEntries * sizeof(HASHKEY);
cbSize += (_cDataItems + 4) * (_cbDataWidth + sizeof (HASHKEY));
cbSize = TblPageGrowSize(cbSize, TRUE);
Win4Assert(cbSize > _cbData || (fRehash && cbSize == _cbData));
BYTE *pbNewData;
if (_pData && cbSize < TBL_PAGE_MAX_SEGMENT_SIZE) {
pbNewData = (BYTE *)
TblPageRealloc(_pData, _ulMemCounter, cbSize, 0);
} else {
pbNewData =
(BYTE *)TblPageAlloc(cbSize, _ulMemCounter, TBL_SIG_COMPRESSED);
}
tbDebugOut((DEB_ITRACE, "New hash table at = %x\n", pbNewData));
if (_pData != NULL && !fRehash) {
if (_pData != pbNewData) {
RtlCopyMemory(pbNewData, _pData, _cbData);
TblPageDealloc(_pData, _ulMemCounter, _cbData);
_pData = pbNewData;
}
_cbData = cbSize;
_pHashTable = (HASHKEY *) _pData;
_pDataItems = (BYTE *) (_pHashTable + _cHashEntries);
} else {
BYTE *pOldDataItems = _pDataItems;
VOID *pOldData = _pData;
ULONG cbOldSize = _cbData;
_pData = pbNewData;
_cbData = cbSize;
_pHashTable = (HASHKEY *)_pData;
_cHashEntries = cNewHashEntries;
_pDataItems = (BYTE *) (_pHashTable + _cHashEntries);
if (pOldData != NULL)
RtlMoveMemory(_pDataItems,
pOldDataItems,
_cDataItems * (sizeof (HASHKEY) + _cbDataWidth));
RtlZeroMemory(_pHashTable, cNewHashEntries * sizeof (HASHKEY));
_maxChain = 0;
//
// Now re-add all old data items to the hash table.
//
pOldDataItems = _pDataItems;
for (HASHKEY i=1; i<=_cDataItems; i++) {
pOldDataItems += sizeof (HASHKEY); // skip hash chain
_Rehash(i, pOldDataItems);
pOldDataItems += _cbDataWidth; // skip data item
}
if (pOldData != NULL && pOldData != _pData)
TblPageDealloc(pOldData, _ulMemCounter, cbOldSize);
}
_fGrowthInProgress = FALSE;
return;
}
//+---------------------------------------------------------------------------
//
// Function: _ClearAll
//
// Synopsis: Method clears all the data in the "fixed width" part of the
// memory buffer.
//
// Arguments: (none)
//
// History: 12-16-94 srikants Created
//
// Notes:
//
//----------------------------------------------------------------------------
void CCompressedColHash::_ClearAll()
{
RtlZeroMemory(_pHashTable, _cHashEntries * sizeof (HASHKEY));
RtlZeroMemory(_pDataItems, _cDataItems * _cbDataWidth );
_cDataItems = 0;
}