windows-nt/Source/XPSP1/NT/net/ias/services/inc/hashtbl.h
2020-09-26 16:20:57 +08:00

685 lines
15 KiB
C++

///////////////////////////////////////////////////////////////////////////////
//
// Copyright (c) 1997, Microsoft Corp. All rights reserved.
//
// FILE
//
// hashtbl.h
//
// SYNOPSIS
//
// This file describes the hash_table template class.
//
// MODIFICATION HISTORY
//
// 09/23/1997 Original version.
//
///////////////////////////////////////////////////////////////////////////////
#ifndef _HASHTBL_H_
#define _HASHTBL_H_
#include <algorithm>
#include <functional>
#include <string>
#include <iasapi.h>
#include <nocopy.h>
//////////
// TEMPLATE STRUCT identity
//////////
template<class _Ty>
struct identity : std::unary_function<_Ty, _Ty>
{
_Ty operator()(const _Ty& _X) const
{
return _X;
}
};
///////////////////////////////////////////////////////////////////////////////
//
// CLASS
//
// Caster<Type1, Type2>
//
// DESCRIPTION
//
// Function class that casts references from Type1 to Type2. Used for
// the hash table default parameters.
//
///////////////////////////////////////////////////////////////////////////////
template <class Type1, class Type2>
class Caster : public std::unary_function<Type1, const Type2&>
{
public:
Caster() {}
const Type2& operator()(const Type1& X) const
{
return X;
}
};
///////////////////////////////////////////////////////////////////////////////
//
// CLASS
//
// ExtractFirst<T>
//
// DESCRIPTION
//
// Function class that extracts the first item from a pair. Useful for
// setting up an STL style map where the first item in the pair is the key.
//
///////////////////////////////////////////////////////////////////////////////
template <class T>
class ExtractFirst : public std::unary_function<T, const T::first_type&>
{
public:
const T::first_type& operator()(const T& X) const
{
return X.first;
}
};
//////////
// I'm putting all the hash functions inside a namespace since hash
// is such a common identifier.
//////////
namespace hash_util
{
///////////////////////////////////////////////////////////////////////////////
//
// FUNCTION
//
// hash(const std::basic_string<E>& str)
//
// DESCRIPTION
//
// Function to compute a hash value for an STL string.
//
///////////////////////////////////////////////////////////////////////////////
template <class E>
inline ULONG hash(const std::basic_string<E>& key)
{
return IASHashBytes((CONST BYTE*)key.data(), key.length() * sizeof(E));
}
///////////////////////////////////////////////////////////////////////////////
//
// FUNCTION
//
// hash(ULONG key)
//
// and
//
// hash(LONG key)
//
// DESCRIPTION
//
// Functions to compute a hash value for a 32-bit integer.
// Uses Robert Jenkins' 32-bit mix function.
//
///////////////////////////////////////////////////////////////////////////////
inline ULONG hash(ULONG key)
{
key += (key << 12);
key ^= (key >> 22);
key += (key << 4);
key ^= (key >> 9);
key += (key << 10);
key ^= (key >> 2);
key += (key << 7);
key ^= (key >> 12);
return key;
}
inline ULONG hash(LONG key)
{
return hash((ULONG)key);
}
///////////////////////////////////////////////////////////////////////////////
//
// FUNCTION
//
// hash(const T* key)
//
// DESCRIPTION
//
// Function to compute a hash value for a pointer.
// Implements Knuth's multiplicative hash with a bit shift to account for
// address alignment.
//
///////////////////////////////////////////////////////////////////////////////
template <class T>
inline ULONG hash(const T* key)
{
return 2654435761 * ((unsigned long)key >> 3);
}
//////////
// Overloadings of the above to hash strings.
//////////
inline ULONG hash<char>(const char* key)
{
return IASHashBytes((CONST BYTE*)key,
key ? strlen(key) : 0);
}
inline ULONG hash<wchar_t>(const wchar_t* key)
{
return IASHashBytes((CONST BYTE*)key,
key ? wcslen(key) * sizeof(wchar_t) : 0);
}
///////////////////////////////////////////////////////////////////////////////
//
// CLASS
//
// Hasher
//
// DESCRIPTION
//
// Function class that uses the 'default' hash functions defined above.
//
///////////////////////////////////////////////////////////////////////////////
template <class _Ty>
struct Hasher
: public std::unary_function<_Ty, ULONG>
{
ULONG operator()(const _Ty& _X) const
{
return hash(_X);
}
};
///////////////////////////////////////////////////////////////////////////////
//
// CLASS
//
// ObjectHasher
//
// DESCRIPTION
//
// Function class that invokes a bound 'hash' method.
//
///////////////////////////////////////////////////////////////////////////////
template <class _Ty>
struct ObjectHasher
: public std::unary_function<_Ty, ULONG>
{
ULONG operator()(const _Ty& _X) const
{
return _X.hash();
}
};
} // hash_util
///////////////////////////////////////////////////////////////////////////////
//
// CLASS
//
// hash_table<Key, Hasher, Value, Extractor, KeyMatch>
//
// DESCRIPTION
//
// Implements a general-purpose hash table. This can implement a map, a
// set, or a hybrid depending on how Key, Value, and Extractor are
// specified. Note that the default parameters for Value and Extractor
// implement a set.
//
// NOTES
//
// Although I used similar nomenclature, this is not an STL collection.
// In particular, the iterator does not conform to the STL guidelines.
//
// This class is not thread safe.
//
///////////////////////////////////////////////////////////////////////////////
template <
class Key,
class Hasher = hash_util::ObjectHasher<Key>,
class Value = Key,
class Extractor = Caster<Value, Key>,
class KeyMatch = std::equal_to<Key>
>
class hash_table : NonCopyable
{
public:
typedef hash_table<Key, Hasher, Value, Extractor, KeyMatch> table_type;
typedef Key key_type;
typedef Value value_type;
protected:
//////////
// Singly-linked list node.
//////////
struct Node
{
Node* next; // Next node in the list (is NULL for last item).
value_type value; // Value stored in this node.
Node(const value_type& _V) : value(_V) {}
// Erase the node immediately following this.
void erase_next()
{
Node* node = next;
next = next->next;
delete node;
}
};
//////////
//
// Singly-linked list. This is not intended to be a general-purpose class;
// it is only intended to serve as a bucket in a hash table.
//
// Note: I have intentionally NOT deleted the list nodes in the destructor.
// This is to support the hash_table grow() method.
//
//////////
struct SList
{
Node* head; // The first node in the list (if any).
SList() : head(NULL) {}
// Delete all nodes in the list.
void clear()
{
while (head) pop_front();
}
// Remove a node from the front of the list.
void pop_front()
{
((Node*)&head)->erase_next();
}
// Add a node to the front of the list.
void push_front(Node* node)
{
node->next = head;
head = node;
}
};
public:
//////////
//
// Hash table iterator.
//
// Note: This iterator is NOT safe. If the hash table is resized, the
// iterator will no longer be valid.
//
//////////
class const_iterator
{
public:
const_iterator(SList* _first, SList* _end)
: node(_first->head), bucket(_first), end(_end)
{
find_node();
}
const value_type& operator*() const
{
return node->value;
}
const value_type* operator->() const
{
return &**this;
}
void operator++()
{
node = node->next;
find_node();
}
bool more() const
{
return bucket != end;
}
protected:
friend table_type;
Node* MyNode() const
{
return node;
}
// Advance until we're on a node or we've reached the end.
void find_node()
{
while (!node && ++bucket != end)
{
node = bucket->head;
}
}
Node* node; // The node under the iterator.
SList* bucket; // The current bucket.
SList* end; // The end of the bucket array.
};
typedef const_iterator iterator;
//////////
// Constructor.
//////////
hash_table(size_t size = 16,
const Hasher& h = Hasher(),
const Extractor& e = Extractor(),
const KeyMatch& k = KeyMatch())
: buckets(1),
entries(0),
hasher(h),
extractor(e),
key_match(k)
{
// Set buckets to smallest power of 2 greater than or equal to size.
while (buckets < size) buckets <<= 1;
table = new SList[buckets];
// Calculate the hash mask.
mask = buckets - 1;
}
//////////
// Destructor.
//////////
~hash_table()
{
clear();
delete[] table;
}
//////////
// Return an iterator positioned at the start of the hash table.
//////////
const_iterator begin() const
{
return const_iterator(table, table + buckets);
}
//////////
// Clear all entries from the hash table.
//////////
void clear()
{
if (!empty())
{
for (size_t i=0; i<buckets; i++)
{
table[i].clear();
}
entries = 0;
}
}
bool empty() const
{
return entries == 0;
}
//////////
// Erase all entries matching the given key. Returns the number of entries
// erased.
//////////
size_t erase(const key_type& key)
{
size_t erased = 0;
Node* node = (Node*)&(get_bucket(key).head);
while (node->next)
{
if (key_match(extractor(node->next->value), key))
{
node->erase_next();
++erased;
}
else
{
node = node->next;
}
}
entries -= erased;
return erased;
}
//////////
// Erases the entry under the current iterator.
//////////
void erase(iterator& it)
{
// Only look in the bucket indicated by the iterator.
Node* node = (Node*)&(it.bucket->head);
while (node->next)
{
// Look for a pointer match -- not a key match.
if (node->next == it.node)
{
// Advance the iterator to a valid node ...
++it;
// ... then delete the current one.
node->erase_next();
break;
}
node = node->next;
}
}
//////////
// Search the hash table for the first entry matching key.
//////////
const value_type* find(const key_type& key) const
{
return search_bucket(get_bucket(key), key);
}
//////////
// Insert a new entry into the hash table ONLY if the key is unique. Returns
// true if successful, false otherwise.
//////////
bool insert(const value_type& value)
{
reserve_space();
SList& b = get_bucket(extractor(value));
if (search_bucket(b, extractor(value))) return false;
b.push_front(new Node(value));
add_entry();
return true;
}
//////////
// Insert a new entry into the hash table without checking uniqueness.
//////////
void multi_insert(const value_type& value)
{
reserve_space();
get_bucket(extractor(value)).push_front(new Node(value));
add_entry();
}
//////////
// Inserts the entry if the key is unique. Otherwise, overwrites the first
// entry found with a matching key. Returns true if an entry was
// overwritten, false otherwise.
//////////
bool overwrite(const value_type& value)
{
reserve_space();
SList& b = get_bucket(extractor(value));
const value_type* existing = search_bucket(b, extractor(value));
if (existing)
{
// We can get away with modifying the value in place, because we
// know the hash value must be the same. I destroy the old value
// and construct a new one inplace, so that Value doesn't need an
// assignment operator.
existing->~value_type();
new ((void*)existing) value_type(value);
return true;
}
b.push_front(new Node(value));
add_entry();
return false;
}
//////////
// Return the number of entries in the hash table.
//////////
size_t size() const
{
return entries;
}
protected:
//////////
// Increment the entries count.
//////////
void add_entry()
{
++entries;
}
//////////
// Grow the hash table as needed. We have to separate reserve_space and
// add_entry to make the collection exception safe (since there will be
// an intervening new).
//////////
void reserve_space()
{
if (entries >= buckets) grow();
}
//////////
// Return the bucket for a given key.
//////////
SList& get_bucket(const key_type& key) const
{
return table[hasher(key) & mask];
}
//////////
// Increase the capacity of the hash table.
//////////
void grow()
{
// We must allocate the memory first to be exception-safe.
SList* newtbl = new SList[buckets << 1];
// Initialize an iterator for the old table ...
const_iterator i = begin();
// ... then swap in the new table.
std::swap(table, newtbl);
buckets <<= 1;
mask = buckets - 1;
// Iterate through the old and insert the entries into the new.
while (i.more())
{
Node* node = i.MyNode();
// Increment the iterator ...
++i;
// ... before we clobber the node's next pointer.
get_bucket(extractor(node->value)).push_front(node);
}
// Delete the old table.
delete[] newtbl;
}
//////////
// Search a bucket for a specified key.
//////////
const value_type* search_bucket(SList& bucket, const key_type& key) const
{
Node* node = bucket.head;
while (node)
{
if (key_match(extractor(node->value), key))
{
return &node->value;
}
node = node->next;
}
return NULL;
}
size_t buckets; // The number of buckets in the hash table.
size_t mask; // Bit mask used for reducing hash values.
size_t entries; // The number of entries in the hash table.
SList* table; // An array of buckets.
Hasher hasher; // Used to hash keys.
Extractor extractor; // Used to convert values to keys.
KeyMatch key_match; // Used to test keys for equality.
};
#endif // _HASHTBL_H_