windows-nt/Source/XPSP1/NT/shell/shdocvw/hist/cachesrch.cpp
2020-09-26 16:20:57 +08:00

251 lines
7.4 KiB
C++

/**********************************************************************
Cache Search Stuff (simple strstr)
Marc Miller (t-marcmi) - 1998
**********************************************************************/
#include "cachesrch.h"
DWORD CacheSearchEngine::CacheStreamWrapper::s_dwPageSize = 0;
BOOL CacheSearchEngine::CacheStreamWrapper::_ReadNextBlock() {
if (_fEndOfFile)
return FALSE;
if (!s_dwPageSize) {
SYSTEM_INFO sysInfo;
GetSystemInfo(&sysInfo);
s_dwPageSize = sysInfo.dwPageSize;
}
BOOL fNewRead = FALSE; // is this our first look at this file?
if (!_pbBuff) {
// Allocate a page of memory
// Note: find out why this returned error code #87
//_pbBuff = (LPBYTE)(VirtualAlloc(NULL, s_dwPageSize, MEM_COMMIT, PAGE_READWRITE));
_pbBuff = (LPBYTE)(LocalAlloc(LPTR, s_dwPageSize));
if (!_pbBuff) {
//DWORD dwError = GetLastError();
return FALSE;
}
fNewRead = TRUE;
_dwCacheStreamLoc = 0;
}
BOOL fSuccess;
DWORD dwSizeRead = s_dwPageSize;
if ((fSuccess = ReadUrlCacheEntryStream(_hCacheStream, _dwCacheStreamLoc,
_pbBuff, &dwSizeRead, 0)) && dwSizeRead)
{
_fEndOfFile = (dwSizeRead < s_dwPageSize);
_dwCacheStreamLoc += dwSizeRead;
_dwBuffSize = dwSizeRead;
_pbBuffPos = _pbBuff;
_pbBuffLast = _pbBuff + dwSizeRead;
_dataType = ASCII_DATA; // default
if (fNewRead) {
// deterine data type
if (_dwBuffSize >= sizeof(USHORT)) {
if (*((USHORT *)_pbBuff) == UNICODE_SIGNATURE)
_dataType = UNICODE_DATA;
else if (*((USHORT *)_pbBuff) == UNICODE_SIGNATURE_BACKWARDS)
_dataType = UNICODE_BACKWARDS_DATA;
if (s_IsUnicode(_dataType))
_pbBuffPos += s_Charsize(_dataType);
}
}
}
else {
fSuccess = FALSE;
DWORD dwError = GetLastError();
ASSERT(dwError != ERROR_INSUFFICIENT_BUFFER);
}
return fSuccess;
}
CacheSearchEngine::CacheStreamWrapper::CacheStreamWrapper(HANDLE hCacheStream) {
// this class can be allocated on the stack:
_pbBuff = NULL;
_pbBuffPos = NULL;
_pbBuffLast = NULL;
_dwBuffSize = 0;
_hCacheStream = hCacheStream;
_fEndOfFile = FALSE;
// Read in preliminary block of data --
// Die on next read to handle failure
_fEndOfFile = !(_ReadNextBlock());
}
CacheSearchEngine::CacheStreamWrapper::~CacheStreamWrapper() {
if (_pbBuff) {
//VirtualFree(_pbBuff);
LocalFree(_pbBuff);;
_pbBuff = NULL;
}
}
// Read next byte from cache stream, reading in next block if necessary
BOOL CacheSearchEngine::CacheStreamWrapper::_GetNextByte(BYTE &b)
{
//
// If the initial read fails _pbBuffPos will be NULL. Don't
// allow it to be dereffed.
//
BOOL fSuccess = _pbBuffPos ? TRUE : FALSE;
if (_pbBuffPos == _pbBuffLast)
fSuccess = _ReadNextBlock();
if (fSuccess)
b = *(_pbBuffPos++);
return fSuccess;
}
BOOL CacheSearchEngine::CacheStreamWrapper::GetNextChar(WCHAR &wc) {
BOOL fSuccess = TRUE;
if (s_IsUnicode(_dataType)) {
BYTE b1, b2;
LPBYTE bs = (LPBYTE)&wc;
if (_GetNextByte(b1) && _GetNextByte(b2)) {
switch (_dataType) {
case UNICODE_DATA:
bs[0] = b1;
bs[1] = b2;
break;
case UNICODE_BACKWARDS_DATA:
bs[0] = b2;
bs[1] = b1;
break;
default: ASSERT(0);
}
}
else
fSuccess = FALSE;
}
else
{
BYTE szData[2];
if (_GetNextByte(szData[0]))
{
int cch = 1;
if (IsDBCSLeadByte(szData[0]))
{
if (!_GetNextByte(szData[1]))
{
fSuccess = FALSE;
}
cch++;
}
if (fSuccess)
{
fSuccess = (MultiByteToWideChar(CP_ACP, 0, (LPSTR)szData, cch, &wc, 1) > 0);
}
}
else
{
fSuccess = FALSE;
}
}
return fSuccess;
}
// Prepare a search target string for searching --
void CacheSearchEngine::StreamSearcher::_PrepareSearchTarget(LPCWSTR pwszSearchTarget)
{
UINT uStrLen = lstrlenW(pwszSearchTarget);
_pwszPreparedSearchTarget = ((LPWSTR)LocalAlloc(LPTR, (uStrLen + 1) * sizeof(WCHAR)));
if (_pwszPreparedSearchTarget) {
// Strip leading and trailing whitespace and compress adjacent whitespace characters
// into literal spaces
LPWSTR pwszTemp = _pwszPreparedSearchTarget;
pwszSearchTarget = s_SkipWhiteSpace(pwszSearchTarget);
BOOL fAddWs = FALSE;
while(*pwszSearchTarget) {
if (s_IsWhiteSpace(*pwszSearchTarget)) {
fAddWs = TRUE;
pwszSearchTarget = s_SkipWhiteSpace(pwszSearchTarget);
}
else {
if (fAddWs) {
*(pwszTemp++) = L' ';
fAddWs = FALSE;
}
*(pwszTemp++) = *(pwszSearchTarget++);
}
}
*pwszTemp = L'\0';
}
}
// Search a character stream for a searchtarget
// Does a simple strstr, but tries to be smart about whitespace and
// ignores HTML where possible...
BOOL CacheSearchEngine::StreamSearcher::SearchCharStream(CacheSearchEngine::IWideSequentialReadStream &wsrs,
BOOL fIsHTML/* = FALSE*/)
{
BOOL fFound = FALSE;
if (_pwszPreparedSearchTarget && *_pwszPreparedSearchTarget)
{
WCHAR wc;
LPCWSTR pwszCurrent = _pwszPreparedSearchTarget;
BOOL fMatchedWS = FALSE;
#if 0
BOOL fIgnoreHTMLTag = FALSE;
#endif
while(*pwszCurrent && wsrs.GetNextChar(wc)) {
#if 0
if (fIsHTML && (wc == L'<'))
fIgnoreHTMLTag = TRUE;
else if (fIgnoreHTMLTag) {
if (wc == L'>')
fIgnoreHTMLTag = FALSE;
}
else
#endif
if (s_IsWhiteSpace(wc)) {
// matched whitespace in search stream, look for
// matching whitespace in target string
if (!fMatchedWS) {
if (s_IsWhiteSpace(*pwszCurrent)) {
fMatchedWS = TRUE;
++pwszCurrent;
}
else
pwszCurrent = _pwszPreparedSearchTarget;
}
}
else {
fMatchedWS = FALSE;
if (!ChrCmpIW(*pwszCurrent, wc)) {
++pwszCurrent;
}
else {
pwszCurrent = _pwszPreparedSearchTarget;
}
}
}
fFound = !*pwszCurrent;
}
return fFound;
}
BOOL CacheSearchEngine::SearchCacheStream(CacheSearchEngine::StreamSearcher &cse, HANDLE hCacheStream,
BOOL fIsHTML/* = FALSE*/)
{
CacheStreamWrapper csw(hCacheStream);
return cse.SearchCharStream(csw, fIsHTML);
}