170 lines
4.3 KiB
C++
170 lines
4.3 KiB
C++
|
//+---------------------------------------------------------------------------
|
|||
|
//
|
|||
|
//
|
|||
|
// Lextable.hpp
|
|||
|
//
|
|||
|
// History:
|
|||
|
// created 7/99 aarayas
|
|||
|
//
|
|||
|
// <20>1999 Microsoft Corporation
|
|||
|
//----------------------------------------------------------------------------
|
|||
|
#include "lextable.hpp"
|
|||
|
|
|||
|
//+---------------------------------------------------------------------------
|
|||
|
//
|
|||
|
// Function: IsUpperPunctW
|
|||
|
//
|
|||
|
// Synopsis: Returns true if wc is a punctuation character in the upper
|
|||
|
// unicode range
|
|||
|
//
|
|||
|
// Parameters:
|
|||
|
//
|
|||
|
// Modifies:
|
|||
|
//
|
|||
|
// History: created 7/99 aarayas
|
|||
|
//
|
|||
|
// Notes:
|
|||
|
//
|
|||
|
//----------------------------------------------------------------------------
|
|||
|
BOOL IsUpperPunctW(const WCHAR wc)
|
|||
|
{
|
|||
|
BOOL fRet = FALSE;
|
|||
|
|
|||
|
if ((wc & 0xff00) == 0x2000) // is Unicode punctuation
|
|||
|
{
|
|||
|
fRet = TRUE;
|
|||
|
}
|
|||
|
else
|
|||
|
{
|
|||
|
switch(wc)
|
|||
|
{
|
|||
|
case 0x01C3: // Yet another latin exclamation mark
|
|||
|
case 0x037E: // Greek question mark
|
|||
|
case 0x03D7: // greek question mark
|
|||
|
case 0x055C: // Armenian exclamation mark
|
|||
|
case 0x055E: // Armenian question mark
|
|||
|
case 0x0589: // armenian period
|
|||
|
case 0x061F: // Arabic question mark
|
|||
|
case 0x06d4: // arabic period
|
|||
|
case 0x2026: // horizontal ellipsis
|
|||
|
case 0x2029: // paragraph separator
|
|||
|
case 0x203C: // Double eclamation mark
|
|||
|
case 0x2762: // Heavy exclamation mark
|
|||
|
case 0x3002: // ideographic period
|
|||
|
case 0xFE52: // small period
|
|||
|
case 0xFE56: // Small question mark
|
|||
|
case 0xFE57: // Small exclamation mark
|
|||
|
case 0xFF01: // Fullwidth exclamation mark
|
|||
|
case 0xFF0E: // fullwidth period
|
|||
|
case 0xFF1F: // Fullwidth question mark
|
|||
|
case 0xFF61: // halfwidth ideographic period
|
|||
|
fRet = TRUE;
|
|||
|
break;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
return fRet;
|
|||
|
}
|
|||
|
|
|||
|
//+---------------------------------------------------------------------------
|
|||
|
//
|
|||
|
// Function: IsUpperWordDelimW
|
|||
|
//
|
|||
|
// Synopsis: figures out whether an upper unicode char is a word delimiter
|
|||
|
//
|
|||
|
// Parameters:
|
|||
|
//
|
|||
|
// Modifies:
|
|||
|
//
|
|||
|
// History: created 7/99 aarayas
|
|||
|
//
|
|||
|
// Notes:
|
|||
|
//
|
|||
|
//----------------------------------------------------------------------------
|
|||
|
BOOL IsUpperWordDelimW(WCHAR wc)
|
|||
|
{
|
|||
|
return (wc & 0xfff0) == 0x2000 ||
|
|||
|
wc == 0x2026 || // ellipsis
|
|||
|
wc == 0x2013 || // en dash
|
|||
|
wc == 0x2014; // em dash
|
|||
|
}
|
|||
|
|
|||
|
//+---------------------------------------------------------------------------
|
|||
|
//
|
|||
|
// Function: TWB_IsCharPunctW
|
|||
|
//
|
|||
|
// Synopsis: figures out whether charater is a punctuation
|
|||
|
//
|
|||
|
// Parameters:
|
|||
|
//
|
|||
|
// Modifies:
|
|||
|
//
|
|||
|
// History: created 7/99 aarayas
|
|||
|
//
|
|||
|
// Notes:
|
|||
|
//
|
|||
|
//----------------------------------------------------------------------------
|
|||
|
BOOL TWB_IsCharPunctW(WCHAR ch)
|
|||
|
{
|
|||
|
return INUPPERPAGES(ch) ? IsUpperPunctW(ch) : rgFlags[(UCHAR) ch] & Lex_PunctFlag;
|
|||
|
}
|
|||
|
|
|||
|
//+---------------------------------------------------------------------------
|
|||
|
//
|
|||
|
// Function: TWB_IsCharPunctW
|
|||
|
//
|
|||
|
// Synopsis: figures out whether charater is a word delimiter
|
|||
|
//
|
|||
|
// Parameters:
|
|||
|
//
|
|||
|
// Modifies:
|
|||
|
//
|
|||
|
// History: created 7/99 aarayas
|
|||
|
//
|
|||
|
// Notes:
|
|||
|
//
|
|||
|
//----------------------------------------------------------------------------
|
|||
|
BOOL TWB_IsCharWordDelimW(WCHAR ch)
|
|||
|
{
|
|||
|
return INUPPERPAGES(ch) ? IsUpperWordDelimW(ch) : rgPunctFlags[(UCHAR) ch] & Lex_SpaceFlag;
|
|||
|
}
|
|||
|
|
|||
|
//+---------------------------------------------------------------------------
|
|||
|
//
|
|||
|
// Function: IsThaiChar
|
|||
|
//
|
|||
|
// Synopsis: determine if the character is a Thai character
|
|||
|
//
|
|||
|
// Parameters:
|
|||
|
//
|
|||
|
// Modifies:
|
|||
|
//
|
|||
|
// History: created 7/99 aarayas
|
|||
|
//
|
|||
|
// Notes: 13/12/99 - take out Thai numbers as Thai Characters since
|
|||
|
// we want to consider them like english numbers.
|
|||
|
//
|
|||
|
//----------------------------------------------------------------------------
|
|||
|
bool IsThaiChar(const WCHAR wch)
|
|||
|
{
|
|||
|
return ( wch >= 0x0e01 && wch <= 0x0e59);
|
|||
|
}
|
|||
|
|
|||
|
//+---------------------------------------------------------------------------
|
|||
|
//
|
|||
|
// Function: IsThaiNumeric
|
|||
|
//
|
|||
|
// Synopsis: determine if the character is a Thai character
|
|||
|
//
|
|||
|
// Parameters:
|
|||
|
//
|
|||
|
// Modifies:
|
|||
|
//
|
|||
|
// History: created 5/00 aarayas
|
|||
|
//
|
|||
|
//----------------------------------------------------------------------------
|
|||
|
bool IsThaiNumeric(const WCHAR wch)
|
|||
|
{
|
|||
|
return ( wch >= 0x0e50 && wch <= 0x0e59);
|
|||
|
}
|