windows-nt/Source/XPSP1/NT/enduser/speech/tts/prompts/engine/common.h

/////////////////////////////////////////////////////////////////////////////
// 
// common.h
//
// Created by JOEM  02-2000
// Copyright (C) 2000 Microsoft Corporation
// All Rights Reserved
//
//////////////////////////////////////////////////////////// JOEM  02-2000 //

#ifndef _COMMON_H_
#define _COMMON_H_

#include <spddkhlp.h>
#include <spcollec.h>
#include <stdio.h>

#ifdef _WIN32
#include <wchar.h>
#include <windows.h>
#else 
#define OutputDebugStringW puts
#endif

#define MAX_LINE 256

#define SkipWhiteSpace(ptr)    while (*(ptr) && isspace(*(ptr))) {(ptr)++;}
#define SkipNonWhiteSpace(ptr) while (*(ptr) && !isspace(*(ptr))) {(ptr)++;}
#define StringUpperCase(ptr)  { char8* str = (ptr); while (*str) {*str = (char)toupper (*str); str++;} }
#define StringLowerCase(ptr)  { char8* str = (ptr); while (*str) {*str = (char)tolower (*str); str++;} }

#define WSkipWhiteSpace(ptr)    while (*(ptr) && iswspace(*(ptr))) {(ptr)++;}
#define WSkipNonWhiteSpace(ptr) while (*(ptr) && !iswspace(*(ptr))) {(ptr)++;}
#define WStringUpperCase(ptr)  { WCHAR* str = (ptr); while (*str) {*str = towupper (*str); str++;} }
#define WStringLowerCase(ptr)  { WCHAR* str = (ptr); while (*str) {*str = towlower (*str); str++;} }

enum PUNC
{
    KEEP_PUNCTUATION = 0,
    REMOVE_PUNCTUATION = 1
};

//////////////////////////////////////////////////////////////////////
//
// CDynStrArray is a helper class for a hash that contains an array
// of strings for each hash value
//
/////////////////////////////////////////////////////// JOEM 7-2000 //
class CDynStr
{
public:
    CDynStr() {}
    CDynStr(const WCHAR* sz) { dstr = sz; }
    CDynStr(const CSpDynamicString& inDstr) { dstr = inDstr; }
    ~CDynStr() { dstr.Clear(); }
public:
    CSpDynamicString dstr;
};

class CDynStrArray : public IUnknown
{
public:
    CDynStrArray() {}
    ~CDynStrArray() 
    { 
        for ( int i=0; i<m_aDstr.GetSize(); i++ ) 
        {
            m_aDstr[i].dstr.Clear();
        } 
    }

    STDMETHOD(QueryInterface)(const IID& iid, void** ppv) { return S_OK; }
    STDMETHOD_(ULONG, AddRef)() { return 0; }
    STDMETHOD_(ULONG, Release)() { delete this; return 0; }

public:
    CSPArray<CDynStr,CDynStr> m_aDstr;
};


//////////////////////////////////////////////////////////////////////
// CountWords
//
/////////////////////////////////////////////////////// JOEM 7-2000 //
inline USHORT CountWords (const WCHAR* pszText)
{
    int wordNum = 0;
    SPDBG_ASSERT (pszText);
    
    while (*pszText) 
    {
        WSkipWhiteSpace(pszText);
        if (!*pszText) 
        {
            break;
        }
        
        WSkipNonWhiteSpace(pszText);
        wordNum++;
    }
    return (USHORT) wordNum;
}

//////////////////////////////////////////////////////////////////////
// SplitWords
//
// Text will be modified, wordList will be allocated with
// wordCount WCHAR*
//
/////////////////////////////////////////////////////// JOEM 7-2000 //
inline HRESULT SplitWords (WCHAR* text, WCHAR*** wordList, USHORT* wordCount)
{
    SPDBG_FUNC( "SplitWords" );
    HRESULT hr  = S_OK;
    ULONG   i   = 0;
    
    SPDBG_ASSERT (text);
    SPDBG_ASSERT (wordList);
    SPDBG_ASSERT (wordCount);
    
    *wordCount = CountWords (text); 
    
    *wordList = (WCHAR**) calloc (*wordCount, sizeof(**wordList));
    if ( !*wordList )
    {
        hr = E_OUTOFMEMORY;
    }
    
    if ( SUCCEEDED(hr) )
    {
        for (i=0; i<*wordCount; i++) 
        {
            WSkipWhiteSpace (text);
            (*wordList)[i] = text;
            WSkipNonWhiteSpace (text);
            *text++ = L'\0';
        }
    }
    
    SPDBG_REPORT_ON_FAIL( hr );
    return hr;
}

//////////////////////////////////////////////////////////////////////
// RemovePunctuation
//
// Text will be modified
//
/////////////////////////////////////////////////////// JOEM 8-2000 //
inline HRESULT RemovePunctuation (WCHAR** wordList, USHORT* wordCount)
{
    SPDBG_FUNC( "RemovePunctuation" );
    HRESULT hr          = S_OK;
    WCHAR*  pszWord     = NULL;
    WCHAR*  psz         = NULL;
    USHORT  i           = 0;
    USHORT  nextItem    = 0;
    USHORT  numSkipped  = 0;
    
    SPDBG_ASSERT (wordList);
    SPDBG_ASSERT (wordCount);
    
    for ( i=0; i<*wordCount; i++ )
    {

        // If the first char is ' or " or ` then get rid of it
        if ( !wcscspn(wordList[i], L"\"'`") )
        {
            psz = wordList[i]+1;
            wcscpy(wordList[i], psz);
            psz = NULL;
        }

        // If the last char is ' or " or ` or one of these .,;:?! then get rid of it.
        // psz points to the last char
        psz = wordList[i] + wcslen(wordList[i]) - 1;
        if ( !wcscspn(psz, L"\"'`.,;:?!") )
        {
            psz[0] = L'\0';
        }
    }

    // reposition the list items, skipping empty strings
    for ( i=0; i<*wordCount; i++ )
    {
        if ( !wcslen(wordList[i]) ) 
        {
            nextItem = i+1;
            while ( nextItem < *wordCount && !wcslen(wordList[nextItem]) )
            {
                nextItem++;
            }
            if ( nextItem < *wordCount )
            {
                wordList[i] = wordList[nextItem];
                wordList[nextItem] = L"";
            }
            else
            {
                break; // out of items
            }
        }
    }

    *wordCount = i;

    SPDBG_REPORT_ON_FAIL( hr );
    return hr;
}

//////////////////////////////////////////////////////////////////////
// AssembleText
//
// ppszText will be allocated.
//
/////////////////////////////////////////////////////// JOEM 7-2000 //
inline HRESULT AssembleText(const int iStartWord, const int iEndWord, WCHAR** ppszWordList, WCHAR** ppszText)
{
    SPDBG_FUNC( "AssembleText" );
    HRESULT hr      = S_OK;
    int     i       = 0;
    int     iStrLen = 0;

    for ( i=iStartWord; i<=iEndWord; i++ )
    {
        iStrLen += wcslen(ppszWordList[i]) + 1;
    }
    
    if ( iStrLen )
    {
        *ppszText = new WCHAR[iStrLen];
        if ( !*ppszText )
        {
            hr = E_OUTOFMEMORY;
        }
        else
        {
            (*ppszText)[0] = L'\0';
            for ( i=iStartWord; i<=iEndWord; i++ )
            {
                wcscat(*ppszText, ppszWordList[i]);
                if ( i < iEndWord ) 
                {
                    wcscat(*ppszText, L" ");
                }
            }
            WStringUpperCase(*ppszText);
        }
    }
    SPDBG_REPORT_ON_FAIL( hr );
    return hr;
}

//////////////////////////////////////////////////////////////////////
// RegularizeText
//
// Regularizes whitespace, optionally removing punctuation.
//
/////////////////////////////////////////////////////// JOEM 7-2000 //
inline HRESULT RegularizeText(WCHAR* pszText, PUNC removePunc)
{
    SPDBG_FUNC( "RegularizeText" );
    HRESULT hr          = S_OK;
    WCHAR** wordList    = NULL;
    WCHAR*  pszNewText  = NULL;
    USHORT  wordCount   = 0;

    if ( !pszText )
    {
        hr = E_INVALIDARG;
    }

    if ( SUCCEEDED(hr) )
    {
        hr = SplitWords (pszText, &wordList, &wordCount);
    }

    if ( SUCCEEDED(hr) && removePunc )
    {
        hr = RemovePunctuation(wordList, &wordCount);
    }

    if ( SUCCEEDED(hr) )
    {
        hr = AssembleText(0, wordCount-1, wordList, &pszNewText);
    }

    if ( SUCCEEDED(hr) && pszNewText )
    {
        WStringUpperCase(pszNewText);
        wcscpy(pszText, pszNewText);
    }

    if ( pszNewText )
    {
        delete [] pszNewText;
        pszNewText = NULL;
    }

    if ( wordList )
    {
        free(wordList);
    }

    SPDBG_REPORT_ON_FAIL( hr );
    return hr;
}

//////////////////////////////////////////////////////////////////////
// FindUnicodeControlChar
//
/////////////////////////////////////////////////////// JOEM 7-2000 //
inline WCHAR* FindUnicodeControlChar (WCHAR* pszText)
{
    ULONG i = 0;

    while ( i<wcslen(pszText) && !iswcntrl(pszText[i]) )
    {
        i++;
    }

    if ( i == wcslen(pszText) )
    {
        return NULL;
    }
    else
    {
        return &pszText[i];
    }
}

//////////////////////////////////////////////////////////////////////
// FileExist
//
//
/////////////////////////////////////////////////////// JOEM 3-2000 //
inline bool FileExist(const WCHAR *pszName)
{
    SPDBG_FUNC( "FileExist" );
    FILE* fp;
    
    if ( !pszName || !wcslen(pszName) || ( (fp = _wfopen(pszName, L"r")) == NULL ) ) 
    {
        return false;
    }
    fclose (fp);
    return true;
}


#endif
Add source files 2020-09-26 03:20:57 -05:00			`/////////////////////////////////////////////////////////////////////////////`
			`//`
			`// common.h`
			`//`
			`// Created by JOEM 02-2000`
			`// Copyright (C) 2000 Microsoft Corporation`
			`// All Rights Reserved`
			`//`
			`//////////////////////////////////////////////////////////// JOEM 02-2000 //`

			`#ifndef _COMMON_H_`
			`#define _COMMON_H_`

			`#include <spddkhlp.h>`
			`#include <spcollec.h>`
			`#include <stdio.h>`

			`#ifdef _WIN32`
			`#include <wchar.h>`
			`#include <windows.h>`
			`#else`
			`#define OutputDebugStringW puts`
			`#endif`

			`#define MAX_LINE 256`

			`#define SkipWhiteSpace(ptr) while ((ptr) && isspace((ptr))) {(ptr)++;}`
			`#define SkipNonWhiteSpace(ptr) while ((ptr) && !isspace((ptr))) {(ptr)++;}`
			`#define StringUpperCase(ptr) { char8* str = (ptr); while (str) {str = (char)toupper (*str); str++;} }`
			`#define StringLowerCase(ptr) { char8* str = (ptr); while (str) {str = (char)tolower (*str); str++;} }`

			`#define WSkipWhiteSpace(ptr) while ((ptr) && iswspace((ptr))) {(ptr)++;}`
			`#define WSkipNonWhiteSpace(ptr) while ((ptr) && !iswspace((ptr))) {(ptr)++;}`
			`#define WStringUpperCase(ptr) { WCHAR* str = (ptr); while (str) {str = towupper (*str); str++;} }`
			`#define WStringLowerCase(ptr) { WCHAR* str = (ptr); while (str) {str = towlower (*str); str++;} }`

			`enum PUNC`
			`{`
			`KEEP_PUNCTUATION = 0,`
			`REMOVE_PUNCTUATION = 1`
			`};`

			`//////////////////////////////////////////////////////////////////////`
			`//`
			`// CDynStrArray is a helper class for a hash that contains an array`
			`// of strings for each hash value`
			`//`
			`/////////////////////////////////////////////////////// JOEM 7-2000 //`
			`class CDynStr`
			`{`
			`public:`
			`CDynStr() {}`
			`CDynStr(const WCHAR* sz) { dstr = sz; }`
			`CDynStr(const CSpDynamicString& inDstr) { dstr = inDstr; }`
			`~CDynStr() { dstr.Clear(); }`
			`public:`
			`CSpDynamicString dstr;`
			`};`

			`class CDynStrArray : public IUnknown`
			`{`
			`public:`
			`CDynStrArray() {}`
			`~CDynStrArray()`
			`{`
			`for ( int i=0; i<m_aDstr.GetSize(); i++ )`
			`{`
			`m_aDstr[i].dstr.Clear();`
			`}`
			`}`

			`STDMETHOD(QueryInterface)(const IID& iid, void** ppv) { return S_OK; }`
			`STDMETHOD_(ULONG, AddRef)() { return 0; }`
			`STDMETHOD_(ULONG, Release)() { delete this; return 0; }`

			`public:`
			`CSPArray<CDynStr,CDynStr> m_aDstr;`
			`};`


			`//////////////////////////////////////////////////////////////////////`
			`// CountWords`
			`//`
			`/////////////////////////////////////////////////////// JOEM 7-2000 //`
			`inline USHORT CountWords (const WCHAR* pszText)`
			`{`
			`int wordNum = 0;`
			`SPDBG_ASSERT (pszText);`

			`while (*pszText)`
			`{`
			`WSkipWhiteSpace(pszText);`
			`if (!*pszText)`
			`{`
			`break;`
			`}`

			`WSkipNonWhiteSpace(pszText);`
			`wordNum++;`
			`}`
			`return (USHORT) wordNum;`
			`}`

			`//////////////////////////////////////////////////////////////////////`
			`// SplitWords`
			`//`
			`// Text will be modified, wordList will be allocated with`
			`// wordCount WCHAR*`
			`//`
			`/////////////////////////////////////////////////////// JOEM 7-2000 //`
			`inline HRESULT SplitWords (WCHAR* text, WCHAR*** wordList, USHORT* wordCount)`
			`{`
			`SPDBG_FUNC( "SplitWords" );`
			`HRESULT hr = S_OK;`
			`ULONG i = 0;`

			`SPDBG_ASSERT (text);`
			`SPDBG_ASSERT (wordList);`
			`SPDBG_ASSERT (wordCount);`

			`*wordCount = CountWords (text);`

			`wordList = (WCHAR) calloc (wordCount, sizeof(**wordList));`
			`if ( !*wordList )`
			`{`
			`hr = E_OUTOFMEMORY;`
			`}`

			`if ( SUCCEEDED(hr) )`
			`{`
			`for (i=0; i<*wordCount; i++)`
			`{`
			`WSkipWhiteSpace (text);`
			`(*wordList)[i] = text;`
			`WSkipNonWhiteSpace (text);`
			`*text++ = L'\0';`
			`}`
			`}`

			`SPDBG_REPORT_ON_FAIL( hr );`
			`return hr;`
			`}`

			`//////////////////////////////////////////////////////////////////////`
			`// RemovePunctuation`
			`//`
			`// Text will be modified`
			`//`
			`/////////////////////////////////////////////////////// JOEM 8-2000 //`
			`inline HRESULT RemovePunctuation (WCHAR** wordList, USHORT* wordCount)`
			`{`
			`SPDBG_FUNC( "RemovePunctuation" );`
			`HRESULT hr = S_OK;`
			`WCHAR* pszWord = NULL;`
			`WCHAR* psz = NULL;`
			`USHORT i = 0;`
			`USHORT nextItem = 0;`
			`USHORT numSkipped = 0;`

			`SPDBG_ASSERT (wordList);`
			`SPDBG_ASSERT (wordCount);`

			`for ( i=0; i<*wordCount; i++ )`
			`{`

			// If the first char is ' or " or ` then get rid of it
			if ( !wcscspn(wordList[i], L"\"'`") )
			`{`
			`psz = wordList[i]+1;`
			`wcscpy(wordList[i], psz);`
			`psz = NULL;`
			`}`

			// If the last char is ' or " or ` or one of these .,;:?! then get rid of it.
			`// psz points to the last char`
			`psz = wordList[i] + wcslen(wordList[i]) - 1;`
			if ( !wcscspn(psz, L"\"'`.,;:?!") )
			`{`
			`psz[0] = L'\0';`
			`}`
			`}`

			`// reposition the list items, skipping empty strings`
			`for ( i=0; i<*wordCount; i++ )`
			`{`
			`if ( !wcslen(wordList[i]) )`
			`{`
			`nextItem = i+1;`
			`while ( nextItem < *wordCount && !wcslen(wordList[nextItem]) )`
			`{`
			`nextItem++;`
			`}`
			`if ( nextItem < *wordCount )`
			`{`
			`wordList[i] = wordList[nextItem];`
			`wordList[nextItem] = L"";`
			`}`
			`else`
			`{`
			`break; // out of items`
			`}`
			`}`
			`}`

			`*wordCount = i;`

			`SPDBG_REPORT_ON_FAIL( hr );`
			`return hr;`
			`}`

			`//////////////////////////////////////////////////////////////////////`
			`// AssembleText`
			`//`
			`// ppszText will be allocated.`
			`//`
			`/////////////////////////////////////////////////////// JOEM 7-2000 //`
			`inline HRESULT AssembleText(const int iStartWord, const int iEndWord, WCHAR ppszWordList, WCHAR ppszText)`
			`{`
			`SPDBG_FUNC( "AssembleText" );`
			`HRESULT hr = S_OK;`
			`int i = 0;`
			`int iStrLen = 0;`

			`for ( i=iStartWord; i<=iEndWord; i++ )`
			`{`
			`iStrLen += wcslen(ppszWordList[i]) + 1;`
			`}`

			`if ( iStrLen )`
			`{`
			`*ppszText = new WCHAR[iStrLen];`
			`if ( !*ppszText )`
			`{`
			`hr = E_OUTOFMEMORY;`
			`}`
			`else`
			`{`
			`(*ppszText)[0] = L'\0';`
			`for ( i=iStartWord; i<=iEndWord; i++ )`
			`{`
			`wcscat(*ppszText, ppszWordList[i]);`
			`if ( i < iEndWord )`
			`{`
			`wcscat(*ppszText, L" ");`
			`}`
			`}`
			`WStringUpperCase(*ppszText);`
			`}`
			`}`
			`SPDBG_REPORT_ON_FAIL( hr );`
			`return hr;`
			`}`

			`//////////////////////////////////////////////////////////////////////`
			`// RegularizeText`
			`//`
			`// Regularizes whitespace, optionally removing punctuation.`
			`//`
			`/////////////////////////////////////////////////////// JOEM 7-2000 //`
			`inline HRESULT RegularizeText(WCHAR* pszText, PUNC removePunc)`
			`{`
			`SPDBG_FUNC( "RegularizeText" );`
			`HRESULT hr = S_OK;`
			`WCHAR** wordList = NULL;`
			`WCHAR* pszNewText = NULL;`
			`USHORT wordCount = 0;`

			`if ( !pszText )`
			`{`
			`hr = E_INVALIDARG;`
			`}`

			`if ( SUCCEEDED(hr) )`
			`{`
			`hr = SplitWords (pszText, &wordList, &wordCount);`
			`}`

			`if ( SUCCEEDED(hr) && removePunc )`
			`{`
			`hr = RemovePunctuation(wordList, &wordCount);`
			`}`

			`if ( SUCCEEDED(hr) )`
			`{`
			`hr = AssembleText(0, wordCount-1, wordList, &pszNewText);`
			`}`

			`if ( SUCCEEDED(hr) && pszNewText )`
			`{`
			`WStringUpperCase(pszNewText);`
			`wcscpy(pszText, pszNewText);`
			`}`

			`if ( pszNewText )`
			`{`
			`delete [] pszNewText;`
			`pszNewText = NULL;`
			`}`

			`if ( wordList )`
			`{`
			`free(wordList);`
			`}`

			`SPDBG_REPORT_ON_FAIL( hr );`
			`return hr;`
			`}`

			`//////////////////////////////////////////////////////////////////////`
			`// FindUnicodeControlChar`
			`//`
			`/////////////////////////////////////////////////////// JOEM 7-2000 //`
			`inline WCHAR* FindUnicodeControlChar (WCHAR* pszText)`
			`{`
			`ULONG i = 0;`

			`while ( i<wcslen(pszText) && !iswcntrl(pszText[i]) )`
			`{`
			`i++;`
			`}`

			`if ( i == wcslen(pszText) )`
			`{`
			`return NULL;`
			`}`
			`else`
			`{`
			`return &pszText[i];`
			`}`
			`}`

			`//////////////////////////////////////////////////////////////////////`
			`// FileExist`
			`//`
			`//`
			`/////////////////////////////////////////////////////// JOEM 3-2000 //`
			`inline bool FileExist(const WCHAR *pszName)`
			`{`
			`SPDBG_FUNC( "FileExist" );`
			`FILE* fp;`

			`if ( !pszName \|\| !wcslen(pszName) \|\| ( (fp = _wfopen(pszName, L"r")) == NULL ) )`
			`{`
			`return false;`
			`}`
			`fclose (fp);`
			`return true;`
			`}`



			`#endif`