windows-nt/Source/XPSP1/NT/inetsrv/intlwb/thai2/sth/lextable.cpp
2020-09-26 16:20:57 +08:00

170 lines
4.3 KiB
C++

//+---------------------------------------------------------------------------
//
//
// Lextable.hpp
//
// History:
// created 7/99 aarayas
//
// ©1999 Microsoft Corporation
//----------------------------------------------------------------------------
#include "lextable.hpp"
//+---------------------------------------------------------------------------
//
// Function: IsUpperPunctW
//
// Synopsis: Returns true if wc is a punctuation character in the upper
// unicode range
//
// Parameters:
//
// Modifies:
//
// History: created 7/99 aarayas
//
// Notes:
//
//----------------------------------------------------------------------------
BOOL IsUpperPunctW(const WCHAR wc)
{
BOOL fRet = FALSE;
if ((wc & 0xff00) == 0x2000) // is Unicode punctuation
{
fRet = TRUE;
}
else
{
switch(wc)
{
case 0x01C3: // Yet another latin exclamation mark
case 0x037E: // Greek question mark
case 0x03D7: // greek question mark
case 0x055C: // Armenian exclamation mark
case 0x055E: // Armenian question mark
case 0x0589: // armenian period
case 0x061F: // Arabic question mark
case 0x06d4: // arabic period
case 0x2026: // horizontal ellipsis
case 0x2029: // paragraph separator
case 0x203C: // Double eclamation mark
case 0x2762: // Heavy exclamation mark
case 0x3002: // ideographic period
case 0xFE52: // small period
case 0xFE56: // Small question mark
case 0xFE57: // Small exclamation mark
case 0xFF01: // Fullwidth exclamation mark
case 0xFF0E: // fullwidth period
case 0xFF1F: // Fullwidth question mark
case 0xFF61: // halfwidth ideographic period
fRet = TRUE;
break;
}
}
return fRet;
}
//+---------------------------------------------------------------------------
//
// Function: IsUpperWordDelimW
//
// Synopsis: figures out whether an upper unicode char is a word delimiter
//
// Parameters:
//
// Modifies:
//
// History: created 7/99 aarayas
//
// Notes:
//
//----------------------------------------------------------------------------
BOOL IsUpperWordDelimW(WCHAR wc)
{
return (wc & 0xfff0) == 0x2000 ||
wc == 0x2026 || // ellipsis
wc == 0x2013 || // en dash
wc == 0x2014; // em dash
}
//+---------------------------------------------------------------------------
//
// Function: TWB_IsCharPunctW
//
// Synopsis: figures out whether charater is a punctuation
//
// Parameters:
//
// Modifies:
//
// History: created 7/99 aarayas
//
// Notes:
//
//----------------------------------------------------------------------------
BOOL TWB_IsCharPunctW(WCHAR ch)
{
return INUPPERPAGES(ch) ? IsUpperPunctW(ch) : rgFlags[(UCHAR) ch] & Lex_PunctFlag;
}
//+---------------------------------------------------------------------------
//
// Function: TWB_IsCharPunctW
//
// Synopsis: figures out whether charater is a word delimiter
//
// Parameters:
//
// Modifies:
//
// History: created 7/99 aarayas
//
// Notes:
//
//----------------------------------------------------------------------------
BOOL TWB_IsCharWordDelimW(WCHAR ch)
{
return INUPPERPAGES(ch) ? IsUpperWordDelimW(ch) : rgPunctFlags[(UCHAR) ch] & Lex_SpaceFlag;
}
//+---------------------------------------------------------------------------
//
// Function: IsThaiChar
//
// Synopsis: determine if the character is a Thai character
//
// Parameters:
//
// Modifies:
//
// History: created 7/99 aarayas
//
// Notes: 13/12/99 - take out Thai numbers as Thai Characters since
// we want to consider them like english numbers.
//
//----------------------------------------------------------------------------
bool IsThaiChar(const WCHAR wch)
{
return ( wch >= 0x0e01 && wch <= 0x0e59);
}
//+---------------------------------------------------------------------------
//
// Function: IsThaiNumeric
//
// Synopsis: determine if the character is a Thai character
//
// Parameters:
//
// Modifies:
//
// History: created 5/00 aarayas
//
//----------------------------------------------------------------------------
bool IsThaiNumeric(const WCHAR wch)
{
return ( wch >= 0x0e50 && wch <= 0x0e59);
}