352 lines
8.3 KiB
C
352 lines
8.3 KiB
C
|
/////////////////////////////////////////////////////////////////////////////
|
||
|
//
|
||
|
// common.h
|
||
|
//
|
||
|
// Created by JOEM 02-2000
|
||
|
// Copyright (C) 2000 Microsoft Corporation
|
||
|
// All Rights Reserved
|
||
|
//
|
||
|
//////////////////////////////////////////////////////////// JOEM 02-2000 //
|
||
|
|
||
|
#ifndef _COMMON_H_
|
||
|
#define _COMMON_H_
|
||
|
|
||
|
#include <spddkhlp.h>
|
||
|
#include <spcollec.h>
|
||
|
#include <stdio.h>
|
||
|
|
||
|
#ifdef _WIN32
|
||
|
#include <wchar.h>
|
||
|
#include <windows.h>
|
||
|
#else
|
||
|
#define OutputDebugStringW puts
|
||
|
#endif
|
||
|
|
||
|
#define MAX_LINE 256
|
||
|
|
||
|
#define SkipWhiteSpace(ptr) while (*(ptr) && isspace(*(ptr))) {(ptr)++;}
|
||
|
#define SkipNonWhiteSpace(ptr) while (*(ptr) && !isspace(*(ptr))) {(ptr)++;}
|
||
|
#define StringUpperCase(ptr) { char8* str = (ptr); while (*str) {*str = (char)toupper (*str); str++;} }
|
||
|
#define StringLowerCase(ptr) { char8* str = (ptr); while (*str) {*str = (char)tolower (*str); str++;} }
|
||
|
|
||
|
#define WSkipWhiteSpace(ptr) while (*(ptr) && iswspace(*(ptr))) {(ptr)++;}
|
||
|
#define WSkipNonWhiteSpace(ptr) while (*(ptr) && !iswspace(*(ptr))) {(ptr)++;}
|
||
|
#define WStringUpperCase(ptr) { WCHAR* str = (ptr); while (*str) {*str = towupper (*str); str++;} }
|
||
|
#define WStringLowerCase(ptr) { WCHAR* str = (ptr); while (*str) {*str = towlower (*str); str++;} }
|
||
|
|
||
|
enum PUNC
|
||
|
{
|
||
|
KEEP_PUNCTUATION = 0,
|
||
|
REMOVE_PUNCTUATION = 1
|
||
|
};
|
||
|
|
||
|
//////////////////////////////////////////////////////////////////////
|
||
|
//
|
||
|
// CDynStrArray is a helper class for a hash that contains an array
|
||
|
// of strings for each hash value
|
||
|
//
|
||
|
/////////////////////////////////////////////////////// JOEM 7-2000 //
|
||
|
class CDynStr
|
||
|
{
|
||
|
public:
|
||
|
CDynStr() {}
|
||
|
CDynStr(const WCHAR* sz) { dstr = sz; }
|
||
|
CDynStr(const CSpDynamicString& inDstr) { dstr = inDstr; }
|
||
|
~CDynStr() { dstr.Clear(); }
|
||
|
public:
|
||
|
CSpDynamicString dstr;
|
||
|
};
|
||
|
|
||
|
class CDynStrArray : public IUnknown
|
||
|
{
|
||
|
public:
|
||
|
CDynStrArray() {}
|
||
|
~CDynStrArray()
|
||
|
{
|
||
|
for ( int i=0; i<m_aDstr.GetSize(); i++ )
|
||
|
{
|
||
|
m_aDstr[i].dstr.Clear();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
STDMETHOD(QueryInterface)(const IID& iid, void** ppv) { return S_OK; }
|
||
|
STDMETHOD_(ULONG, AddRef)() { return 0; }
|
||
|
STDMETHOD_(ULONG, Release)() { delete this; return 0; }
|
||
|
|
||
|
public:
|
||
|
CSPArray<CDynStr,CDynStr> m_aDstr;
|
||
|
};
|
||
|
|
||
|
|
||
|
//////////////////////////////////////////////////////////////////////
|
||
|
// CountWords
|
||
|
//
|
||
|
/////////////////////////////////////////////////////// JOEM 7-2000 //
|
||
|
inline USHORT CountWords (const WCHAR* pszText)
|
||
|
{
|
||
|
int wordNum = 0;
|
||
|
SPDBG_ASSERT (pszText);
|
||
|
|
||
|
while (*pszText)
|
||
|
{
|
||
|
WSkipWhiteSpace(pszText);
|
||
|
if (!*pszText)
|
||
|
{
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
WSkipNonWhiteSpace(pszText);
|
||
|
wordNum++;
|
||
|
}
|
||
|
return (USHORT) wordNum;
|
||
|
}
|
||
|
|
||
|
//////////////////////////////////////////////////////////////////////
|
||
|
// SplitWords
|
||
|
//
|
||
|
// Text will be modified, wordList will be allocated with
|
||
|
// wordCount WCHAR*
|
||
|
//
|
||
|
/////////////////////////////////////////////////////// JOEM 7-2000 //
|
||
|
inline HRESULT SplitWords (WCHAR* text, WCHAR*** wordList, USHORT* wordCount)
|
||
|
{
|
||
|
SPDBG_FUNC( "SplitWords" );
|
||
|
HRESULT hr = S_OK;
|
||
|
ULONG i = 0;
|
||
|
|
||
|
SPDBG_ASSERT (text);
|
||
|
SPDBG_ASSERT (wordList);
|
||
|
SPDBG_ASSERT (wordCount);
|
||
|
|
||
|
*wordCount = CountWords (text);
|
||
|
|
||
|
*wordList = (WCHAR**) calloc (*wordCount, sizeof(**wordList));
|
||
|
if ( !*wordList )
|
||
|
{
|
||
|
hr = E_OUTOFMEMORY;
|
||
|
}
|
||
|
|
||
|
if ( SUCCEEDED(hr) )
|
||
|
{
|
||
|
for (i=0; i<*wordCount; i++)
|
||
|
{
|
||
|
WSkipWhiteSpace (text);
|
||
|
(*wordList)[i] = text;
|
||
|
WSkipNonWhiteSpace (text);
|
||
|
*text++ = L'\0';
|
||
|
}
|
||
|
}
|
||
|
|
||
|
SPDBG_REPORT_ON_FAIL( hr );
|
||
|
return hr;
|
||
|
}
|
||
|
|
||
|
//////////////////////////////////////////////////////////////////////
|
||
|
// RemovePunctuation
|
||
|
//
|
||
|
// Text will be modified
|
||
|
//
|
||
|
/////////////////////////////////////////////////////// JOEM 8-2000 //
|
||
|
inline HRESULT RemovePunctuation (WCHAR** wordList, USHORT* wordCount)
|
||
|
{
|
||
|
SPDBG_FUNC( "RemovePunctuation" );
|
||
|
HRESULT hr = S_OK;
|
||
|
WCHAR* pszWord = NULL;
|
||
|
WCHAR* psz = NULL;
|
||
|
USHORT i = 0;
|
||
|
USHORT nextItem = 0;
|
||
|
USHORT numSkipped = 0;
|
||
|
|
||
|
SPDBG_ASSERT (wordList);
|
||
|
SPDBG_ASSERT (wordCount);
|
||
|
|
||
|
for ( i=0; i<*wordCount; i++ )
|
||
|
{
|
||
|
|
||
|
// If the first char is ' or " or ` then get rid of it
|
||
|
if ( !wcscspn(wordList[i], L"\"'`") )
|
||
|
{
|
||
|
psz = wordList[i]+1;
|
||
|
wcscpy(wordList[i], psz);
|
||
|
psz = NULL;
|
||
|
}
|
||
|
|
||
|
// If the last char is ' or " or ` or one of these .,;:?! then get rid of it.
|
||
|
// psz points to the last char
|
||
|
psz = wordList[i] + wcslen(wordList[i]) - 1;
|
||
|
if ( !wcscspn(psz, L"\"'`.,;:?!") )
|
||
|
{
|
||
|
psz[0] = L'\0';
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// reposition the list items, skipping empty strings
|
||
|
for ( i=0; i<*wordCount; i++ )
|
||
|
{
|
||
|
if ( !wcslen(wordList[i]) )
|
||
|
{
|
||
|
nextItem = i+1;
|
||
|
while ( nextItem < *wordCount && !wcslen(wordList[nextItem]) )
|
||
|
{
|
||
|
nextItem++;
|
||
|
}
|
||
|
if ( nextItem < *wordCount )
|
||
|
{
|
||
|
wordList[i] = wordList[nextItem];
|
||
|
wordList[nextItem] = L"";
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
break; // out of items
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
*wordCount = i;
|
||
|
|
||
|
SPDBG_REPORT_ON_FAIL( hr );
|
||
|
return hr;
|
||
|
}
|
||
|
|
||
|
//////////////////////////////////////////////////////////////////////
|
||
|
// AssembleText
|
||
|
//
|
||
|
// ppszText will be allocated.
|
||
|
//
|
||
|
/////////////////////////////////////////////////////// JOEM 7-2000 //
|
||
|
inline HRESULT AssembleText(const int iStartWord, const int iEndWord, WCHAR** ppszWordList, WCHAR** ppszText)
|
||
|
{
|
||
|
SPDBG_FUNC( "AssembleText" );
|
||
|
HRESULT hr = S_OK;
|
||
|
int i = 0;
|
||
|
int iStrLen = 0;
|
||
|
|
||
|
for ( i=iStartWord; i<=iEndWord; i++ )
|
||
|
{
|
||
|
iStrLen += wcslen(ppszWordList[i]) + 1;
|
||
|
}
|
||
|
|
||
|
if ( iStrLen )
|
||
|
{
|
||
|
*ppszText = new WCHAR[iStrLen];
|
||
|
if ( !*ppszText )
|
||
|
{
|
||
|
hr = E_OUTOFMEMORY;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
(*ppszText)[0] = L'\0';
|
||
|
for ( i=iStartWord; i<=iEndWord; i++ )
|
||
|
{
|
||
|
wcscat(*ppszText, ppszWordList[i]);
|
||
|
if ( i < iEndWord )
|
||
|
{
|
||
|
wcscat(*ppszText, L" ");
|
||
|
}
|
||
|
}
|
||
|
WStringUpperCase(*ppszText);
|
||
|
}
|
||
|
}
|
||
|
SPDBG_REPORT_ON_FAIL( hr );
|
||
|
return hr;
|
||
|
}
|
||
|
|
||
|
//////////////////////////////////////////////////////////////////////
|
||
|
// RegularizeText
|
||
|
//
|
||
|
// Regularizes whitespace, optionally removing punctuation.
|
||
|
//
|
||
|
/////////////////////////////////////////////////////// JOEM 7-2000 //
|
||
|
inline HRESULT RegularizeText(WCHAR* pszText, PUNC removePunc)
|
||
|
{
|
||
|
SPDBG_FUNC( "RegularizeText" );
|
||
|
HRESULT hr = S_OK;
|
||
|
WCHAR** wordList = NULL;
|
||
|
WCHAR* pszNewText = NULL;
|
||
|
USHORT wordCount = 0;
|
||
|
|
||
|
if ( !pszText )
|
||
|
{
|
||
|
hr = E_INVALIDARG;
|
||
|
}
|
||
|
|
||
|
if ( SUCCEEDED(hr) )
|
||
|
{
|
||
|
hr = SplitWords (pszText, &wordList, &wordCount);
|
||
|
}
|
||
|
|
||
|
if ( SUCCEEDED(hr) && removePunc )
|
||
|
{
|
||
|
hr = RemovePunctuation(wordList, &wordCount);
|
||
|
}
|
||
|
|
||
|
if ( SUCCEEDED(hr) )
|
||
|
{
|
||
|
hr = AssembleText(0, wordCount-1, wordList, &pszNewText);
|
||
|
}
|
||
|
|
||
|
if ( SUCCEEDED(hr) && pszNewText )
|
||
|
{
|
||
|
WStringUpperCase(pszNewText);
|
||
|
wcscpy(pszText, pszNewText);
|
||
|
}
|
||
|
|
||
|
if ( pszNewText )
|
||
|
{
|
||
|
delete [] pszNewText;
|
||
|
pszNewText = NULL;
|
||
|
}
|
||
|
|
||
|
if ( wordList )
|
||
|
{
|
||
|
free(wordList);
|
||
|
}
|
||
|
|
||
|
SPDBG_REPORT_ON_FAIL( hr );
|
||
|
return hr;
|
||
|
}
|
||
|
|
||
|
//////////////////////////////////////////////////////////////////////
|
||
|
// FindUnicodeControlChar
|
||
|
//
|
||
|
/////////////////////////////////////////////////////// JOEM 7-2000 //
|
||
|
inline WCHAR* FindUnicodeControlChar (WCHAR* pszText)
|
||
|
{
|
||
|
ULONG i = 0;
|
||
|
|
||
|
while ( i<wcslen(pszText) && !iswcntrl(pszText[i]) )
|
||
|
{
|
||
|
i++;
|
||
|
}
|
||
|
|
||
|
if ( i == wcslen(pszText) )
|
||
|
{
|
||
|
return NULL;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
return &pszText[i];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
//////////////////////////////////////////////////////////////////////
|
||
|
// FileExist
|
||
|
//
|
||
|
//
|
||
|
/////////////////////////////////////////////////////// JOEM 3-2000 //
|
||
|
inline bool FileExist(const WCHAR *pszName)
|
||
|
{
|
||
|
SPDBG_FUNC( "FileExist" );
|
||
|
FILE* fp;
|
||
|
|
||
|
if ( !pszName || !wcslen(pszName) || ( (fp = _wfopen(pszName, L"r")) == NULL ) )
|
||
|
{
|
||
|
return false;
|
||
|
}
|
||
|
fclose (fp);
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
#endif
|