251 lines
7.4 KiB
C++
251 lines
7.4 KiB
C++
/**********************************************************************
|
|
Cache Search Stuff (simple strstr)
|
|
|
|
Marc Miller (t-marcmi) - 1998
|
|
**********************************************************************/
|
|
#include "cachesrch.h"
|
|
|
|
DWORD CacheSearchEngine::CacheStreamWrapper::s_dwPageSize = 0;
|
|
|
|
BOOL CacheSearchEngine::CacheStreamWrapper::_ReadNextBlock() {
|
|
if (_fEndOfFile)
|
|
return FALSE;
|
|
|
|
if (!s_dwPageSize) {
|
|
SYSTEM_INFO sysInfo;
|
|
GetSystemInfo(&sysInfo);
|
|
s_dwPageSize = sysInfo.dwPageSize;
|
|
}
|
|
BOOL fNewRead = FALSE; // is this our first look at this file?
|
|
if (!_pbBuff) {
|
|
// Allocate a page of memory
|
|
|
|
// Note: find out why this returned error code #87
|
|
//_pbBuff = (LPBYTE)(VirtualAlloc(NULL, s_dwPageSize, MEM_COMMIT, PAGE_READWRITE));
|
|
_pbBuff = (LPBYTE)(LocalAlloc(LPTR, s_dwPageSize));
|
|
if (!_pbBuff) {
|
|
//DWORD dwError = GetLastError();
|
|
return FALSE;
|
|
}
|
|
fNewRead = TRUE;
|
|
_dwCacheStreamLoc = 0;
|
|
}
|
|
|
|
BOOL fSuccess;
|
|
DWORD dwSizeRead = s_dwPageSize;
|
|
if ((fSuccess = ReadUrlCacheEntryStream(_hCacheStream, _dwCacheStreamLoc,
|
|
_pbBuff, &dwSizeRead, 0)) && dwSizeRead)
|
|
{
|
|
_fEndOfFile = (dwSizeRead < s_dwPageSize);
|
|
|
|
_dwCacheStreamLoc += dwSizeRead;
|
|
_dwBuffSize = dwSizeRead;
|
|
_pbBuffPos = _pbBuff;
|
|
_pbBuffLast = _pbBuff + dwSizeRead;
|
|
|
|
_dataType = ASCII_DATA; // default
|
|
if (fNewRead) {
|
|
// deterine data type
|
|
if (_dwBuffSize >= sizeof(USHORT)) {
|
|
if (*((USHORT *)_pbBuff) == UNICODE_SIGNATURE)
|
|
_dataType = UNICODE_DATA;
|
|
else if (*((USHORT *)_pbBuff) == UNICODE_SIGNATURE_BACKWARDS)
|
|
_dataType = UNICODE_BACKWARDS_DATA;
|
|
|
|
if (s_IsUnicode(_dataType))
|
|
_pbBuffPos += s_Charsize(_dataType);
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
fSuccess = FALSE;
|
|
DWORD dwError = GetLastError();
|
|
ASSERT(dwError != ERROR_INSUFFICIENT_BUFFER);
|
|
}
|
|
return fSuccess;
|
|
}
|
|
|
|
CacheSearchEngine::CacheStreamWrapper::CacheStreamWrapper(HANDLE hCacheStream) {
|
|
// this class can be allocated on the stack:
|
|
_pbBuff = NULL;
|
|
_pbBuffPos = NULL;
|
|
_pbBuffLast = NULL;
|
|
_dwBuffSize = 0;
|
|
_hCacheStream = hCacheStream;
|
|
_fEndOfFile = FALSE;
|
|
|
|
// Read in preliminary block of data --
|
|
// Die on next read to handle failure
|
|
_fEndOfFile = !(_ReadNextBlock());
|
|
}
|
|
|
|
CacheSearchEngine::CacheStreamWrapper::~CacheStreamWrapper() {
|
|
if (_pbBuff) {
|
|
//VirtualFree(_pbBuff);
|
|
LocalFree(_pbBuff);;
|
|
_pbBuff = NULL;
|
|
}
|
|
}
|
|
|
|
// Read next byte from cache stream, reading in next block if necessary
|
|
BOOL CacheSearchEngine::CacheStreamWrapper::_GetNextByte(BYTE &b)
|
|
{
|
|
//
|
|
// If the initial read fails _pbBuffPos will be NULL. Don't
|
|
// allow it to be dereffed.
|
|
//
|
|
BOOL fSuccess = _pbBuffPos ? TRUE : FALSE;
|
|
|
|
if (_pbBuffPos == _pbBuffLast)
|
|
fSuccess = _ReadNextBlock();
|
|
|
|
if (fSuccess)
|
|
b = *(_pbBuffPos++);
|
|
|
|
return fSuccess;
|
|
}
|
|
|
|
BOOL CacheSearchEngine::CacheStreamWrapper::GetNextChar(WCHAR &wc) {
|
|
BOOL fSuccess = TRUE;
|
|
if (s_IsUnicode(_dataType)) {
|
|
BYTE b1, b2;
|
|
LPBYTE bs = (LPBYTE)&wc;
|
|
if (_GetNextByte(b1) && _GetNextByte(b2)) {
|
|
switch (_dataType) {
|
|
case UNICODE_DATA:
|
|
bs[0] = b1;
|
|
bs[1] = b2;
|
|
break;
|
|
case UNICODE_BACKWARDS_DATA:
|
|
bs[0] = b2;
|
|
bs[1] = b1;
|
|
break;
|
|
default: ASSERT(0);
|
|
}
|
|
}
|
|
else
|
|
fSuccess = FALSE;
|
|
}
|
|
else
|
|
{
|
|
|
|
BYTE szData[2];
|
|
|
|
if (_GetNextByte(szData[0]))
|
|
{
|
|
int cch = 1;
|
|
if (IsDBCSLeadByte(szData[0]))
|
|
{
|
|
if (!_GetNextByte(szData[1]))
|
|
{
|
|
fSuccess = FALSE;
|
|
}
|
|
cch++;
|
|
}
|
|
|
|
if (fSuccess)
|
|
{
|
|
fSuccess = (MultiByteToWideChar(CP_ACP, 0, (LPSTR)szData, cch, &wc, 1) > 0);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
fSuccess = FALSE;
|
|
}
|
|
|
|
}
|
|
return fSuccess;
|
|
}
|
|
|
|
|
|
// Prepare a search target string for searching --
|
|
void CacheSearchEngine::StreamSearcher::_PrepareSearchTarget(LPCWSTR pwszSearchTarget)
|
|
{
|
|
UINT uStrLen = lstrlenW(pwszSearchTarget);
|
|
_pwszPreparedSearchTarget = ((LPWSTR)LocalAlloc(LPTR, (uStrLen + 1) * sizeof(WCHAR)));
|
|
|
|
if (_pwszPreparedSearchTarget) {
|
|
// Strip leading and trailing whitespace and compress adjacent whitespace characters
|
|
// into literal spaces
|
|
LPWSTR pwszTemp = _pwszPreparedSearchTarget;
|
|
pwszSearchTarget = s_SkipWhiteSpace(pwszSearchTarget);
|
|
BOOL fAddWs = FALSE;
|
|
while(*pwszSearchTarget) {
|
|
if (s_IsWhiteSpace(*pwszSearchTarget)) {
|
|
fAddWs = TRUE;
|
|
pwszSearchTarget = s_SkipWhiteSpace(pwszSearchTarget);
|
|
}
|
|
else {
|
|
if (fAddWs) {
|
|
*(pwszTemp++) = L' ';
|
|
fAddWs = FALSE;
|
|
}
|
|
*(pwszTemp++) = *(pwszSearchTarget++);
|
|
}
|
|
}
|
|
*pwszTemp = L'\0';
|
|
}
|
|
}
|
|
|
|
// Search a character stream for a searchtarget
|
|
// Does a simple strstr, but tries to be smart about whitespace and
|
|
// ignores HTML where possible...
|
|
BOOL CacheSearchEngine::StreamSearcher::SearchCharStream(CacheSearchEngine::IWideSequentialReadStream &wsrs,
|
|
BOOL fIsHTML/* = FALSE*/)
|
|
{
|
|
BOOL fFound = FALSE;
|
|
|
|
if (_pwszPreparedSearchTarget && *_pwszPreparedSearchTarget)
|
|
{
|
|
WCHAR wc;
|
|
LPCWSTR pwszCurrent = _pwszPreparedSearchTarget;
|
|
BOOL fMatchedWS = FALSE;
|
|
#if 0
|
|
BOOL fIgnoreHTMLTag = FALSE;
|
|
#endif
|
|
|
|
while(*pwszCurrent && wsrs.GetNextChar(wc)) {
|
|
#if 0
|
|
if (fIsHTML && (wc == L'<'))
|
|
fIgnoreHTMLTag = TRUE;
|
|
else if (fIgnoreHTMLTag) {
|
|
if (wc == L'>')
|
|
fIgnoreHTMLTag = FALSE;
|
|
}
|
|
else
|
|
#endif
|
|
|
|
if (s_IsWhiteSpace(wc)) {
|
|
// matched whitespace in search stream, look for
|
|
// matching whitespace in target string
|
|
if (!fMatchedWS) {
|
|
if (s_IsWhiteSpace(*pwszCurrent)) {
|
|
fMatchedWS = TRUE;
|
|
++pwszCurrent;
|
|
}
|
|
else
|
|
pwszCurrent = _pwszPreparedSearchTarget;
|
|
}
|
|
}
|
|
else {
|
|
fMatchedWS = FALSE;
|
|
if (!ChrCmpIW(*pwszCurrent, wc)) {
|
|
++pwszCurrent;
|
|
}
|
|
else {
|
|
pwszCurrent = _pwszPreparedSearchTarget;
|
|
}
|
|
}
|
|
}
|
|
fFound = !*pwszCurrent;
|
|
}
|
|
return fFound;
|
|
}
|
|
|
|
BOOL CacheSearchEngine::SearchCacheStream(CacheSearchEngine::StreamSearcher &cse, HANDLE hCacheStream,
|
|
BOOL fIsHTML/* = FALSE*/)
|
|
{
|
|
CacheStreamWrapper csw(hCacheStream);
|
|
return cse.SearchCharStream(csw, fIsHTML);
|
|
}
|