windows-nt/Source/XPSP1/NT/base/fs/hsm/wsb/wsbhash.cpp
2020-09-26 16:20:57 +08:00

207 lines
7.4 KiB
C++

/*++
© 1998 Seagate Software, Inc. All rights reserved
Module Name:
Wsbhash.cpp
Abstract:
Some functions for hashing text strings and creating DB keys from
file path names.
NOTE: Since no one needed this code by the time I got it done, it
hasn't been tested!
Author:
Ron White [ronw] 25-Apr-1997
Revision History:
--*/
#include "stdafx.h"
// This pseudorandom permutation table (used by the SimpleHash function below)
// is taken from the article referenced in the comments for that function.
static UCHAR perm_table[] = {
1, 87, 49, 12, 176, 178, 102, 166, 121, 193, 6, 84, 249, 230, 44, 163,
14, 197, 213, 181, 161, 85, 218, 80, 64, 239, 24, 226, 236, 142, 38, 200,
110, 177, 104, 103, 141, 253, 255, 50, 77, 101, 81, 18, 45, 96, 31, 222,
25, 107, 190, 70, 86, 237, 240, 34, 72, 242, 20, 214, 244, 227, 149, 235,
97, 234, 57, 22, 60, 250, 82, 175, 208, 5, 127, 199, 111, 62, 135, 248,
174, 169, 211, 58, 66, 154, 106, 195, 245, 171, 17, 187, 182, 179, 0, 243,
132, 56, 148, 75, 128, 133, 158, 100, 130, 126, 91, 13, 153, 246, 216, 219,
119, 68, 223, 78, 83, 88, 201, 99, 122, 11, 92, 32, 136, 114, 52, 10,
138, 30, 48, 183, 156, 35, 61, 26, 143, 74, 251, 94, 129, 162, 63, 152,
170, 7, 115, 167, 241, 206, 3, 150, 55, 59, 151, 220, 90, 53, 23, 131,
125, 173, 15, 238, 79, 95, 89, 16, 105, 137, 225, 224, 217, 160, 37, 123,
118, 73, 2, 157, 46, 116, 9, 145, 134, 228, 207, 212, 202, 215, 69, 229,
27, 188, 67, 124, 168, 252, 42, 4, 29, 108, 21, 247, 19, 205, 39, 203,
233, 40, 186, 147, 198, 192, 155, 33, 164, 191, 98, 204, 165, 180, 117, 76,
140, 36, 210, 172, 41, 54, 159, 8, 185, 232, 113, 196, 231, 47, 146, 120,
51, 65, 28, 144, 254, 221, 93, 189, 194, 139, 112, 43, 71, 109, 184, 209
};
// Local functions
static HRESULT ProgressiveHash(WCHAR* pWstring, ULONG nChars, UCHAR* pKey,
ULONG keySize, ULONG* pKeyCount);
static UCHAR SimpleHash(UCHAR* pString, ULONG count);
// ProgressiveHash - hash a wide-character string into a byte key of a given
// maximum size. The string is limited to 32K characters (64K bytes) and the
// key size must be at least 16.
//
// The algorithm starts out merely XORing the two bytes of each character into a
// single byte in the key. If it must use the last 15 bytes of the key, it begins
// using the SimpleHash function to hash progressively larger (doubling) chuncks
// of the string into a single byte.
//
// This method is used to try and preserve as much information about short strings
// as possible; to preserve, to some extent, the sort order of strings; and to
// compress long strings into a reasonably sized key. It is assumed (perhaps
// incorrectly) that many of the characters will be ANSI characters an so the
// XOR of the bytes in the initial part of the string won't lose any information.
static HRESULT ProgressiveHash(WCHAR* pWstring, ULONG nChars, UCHAR* pKey,
ULONG keySize, ULONG* pKeyCount)
{
HRESULT hr = S_OK;
try {
ULONG chunk; // Current chunk size
ULONG headSize;
ULONG keyIndex = 0; // Current index into the key
UCHAR* pBytes; // Byte pointer into the string
ULONG remains; // Bytes remaining in the string
// Check arguments
WsbAffirm(NULL != pWstring, E_POINTER);
WsbAffirm(NULL != pKey, E_POINTER);
remains = nChars * 2;
WsbAffirm(65536 >= remains, E_INVALIDARG);
WsbAffirm(15 < keySize, E_INVALIDARG);
// Do the non-progressive part
pBytes = (UCHAR*)pWstring;
headSize = keySize - 15;
while (remains > 0 && keyIndex < headSize) {
pKey[keyIndex++] = (UCHAR) ( *pBytes ^ *(pBytes + 1) );
pBytes += 2;
remains -= 2;
}
// Do the progressive part
chunk = 4;
while (remains > 0) {
if (chunk > remains) {
chunk = remains;
}
pKey[keyIndex++] = SimpleHash(pBytes, chunk);
pBytes += chunk;
remains -= chunk;
chunk *= 2;
}
if (NULL != pKeyCount) {
*pKeyCount = keyIndex;
}
} WsbCatch(hr);
return(hr);
}
// SimpleHash - hash a string of bytes into a single byte.
//
// This algorithm and the permutation table come from the article "Fast Hashing
// of Variable-Length Text Strings" in the June 1990 (33, 6) issue of Communications
// of the ACM (CACM).
// NOTE: For a hash value larger than one byte, the article suggests hashing the
// original string with this function to get one byte, adding 1 (mod 256) to the
// first byte of the string and hashing the new string with this function to get
// the second byte, etc.
static UCHAR SimpleHash(UCHAR* pString, ULONG count)
{
int h = 0;
for (ULONG i = 0; i < count; i++) {
h = perm_table[h ^ pString[i]];
}
return((UCHAR)h);
}
// SquashFilepath - compress a file path name into a (possibly) shorter key.
//
// This function splits the key into a path part (about 3/4 of the initial
// bytes of the key) and a file name part (the rest of the key). For each
// part it uses the ProgressiveHash function to compress the substring.
// This function attempts to preserve enough information in the key that keys
// will be sorted in approximately the same order as the original path names
// and it is unlikely (though not impossible) that two different paths would
// result in the same key. Both of these are dependent on the size of the key.
// A reasonable size is probably 128 bytes, which gives 96 bytes for the path
// and 32 bytes for the file name. A key size of 64 or less will fail because
// the file name part will be too small for the Progressive Hash function.
HRESULT SquashFilepath(WCHAR* pWstring, UCHAR* pKey, ULONG keySize)
{
HRESULT hr = S_OK;
try {
ULONG keyIndex;
ULONG nChars;
WCHAR* pFilename;
ULONG pathKeySize;
// Check arguments
WsbAffirm(NULL != pWstring, E_POINTER);
WsbAffirm(NULL != pKey, E_POINTER);
WsbAffirm(60 < keySize, E_INVALIDARG);
// Calculate some initial values
pFilename = wcsrchr(pWstring, WCHAR('\\'));
if (NULL == pFilename) {
nChars = 0;
pFilename = pWstring;
} else {
nChars = (ULONG)(pFilename - pWstring);
pFilename++;
}
pathKeySize = (keySize / 4) * 3;
// Compress the path
if (0 < nChars) {
WsbAffirmHr(ProgressiveHash(pWstring, nChars, pKey, pathKeySize,
&keyIndex));
} else {
keyIndex = 0;
}
// Fill the rest of the path part of the key with zeros
for ( ; keyIndex < pathKeySize; keyIndex++) {
pKey[keyIndex] = 0;
}
// Compress the file name
nChars = wcslen(pFilename);
if (0 < nChars) {
WsbAffirmHr(ProgressiveHash(pFilename, nChars, &pKey[keyIndex],
keySize - pathKeySize, &keyIndex));
keyIndex += pathKeySize;
}
// Fill the rest of the file name part of the key with zeros
for ( ; keyIndex < keySize; keyIndex++) {
pKey[keyIndex] = 0;
}
} WsbCatch(hr);
return(hr);
}