windows-nt/Source/XPSP1/NT/inetsrv/intlwb/enu/wordbreaker/propflags.h
2020-09-26 16:20:57 +08:00

361 lines
14 KiB
C++

////////////////////////////////////////////////////////////////////////////////
//
// Filename : PropFlags.h
// Purpose : properties definitions
//
// Project : WordBreakers
// Component: English word breaker
//
// Author : yairh
//
// Log:
//
// Jan 06 2000 yairh creation
// May 07 2000 dovh - const array generation:
// split PropArray.h => PropArray.h + PropFlags.h
// May 11 2000 dovh - Simplify GET_PROP to do double indexing always.
// Nov 11 2000 dovh - Special underscore treatment
// (Only added PROP_ALPHA_NUMERIC flag here)
//
////////////////////////////////////////////////////////////////////////////////
#ifndef _PROP_FLAGS_H_
#define _PROP_FLAGS_H_
#define USE_WS_SENTINEL
// #undef USE_WS_SENTINEL
// #define DECLARE_BYTE_ARRAY
#undef DECLARE_BYTE_ARRAY
// #define DECLARE_ULONGLONG_ARRAY
#undef DECLARE_ULONGLONG_ARRAY
const WCHAR TRACE_CHAR[] = \
{L'S', L'E', L'U', L'L', L'N', L'~', L'!', L'@', L'#', L'$', \
L'%', L'-', L'&', L'*', L'(', L')', L'-', L'_', L'=', L'+', \
L'\\', L'|', L'{', L'}', L'[', L']', L'\"', L'\'', L';', L':', \
L'?', L'/', L'<', L'>', L'.', L',', L'w', L'C', L'T', L'B', \
L's', L'X', L'S', L'\0'};
//
// NOTE: DO NOT CHANGE THE ORDER.
// UPDATE GEN_PROP_STRING_VALUE MACRO BELOW WHENEVER FLAGS DEFINITIONS CHANGE!
//
#define PROP_DEFAULT ((ULONGLONG)0)
#define PROP_WS (((ULONGLONG)1)<< 0)
#define PROP_EOS (((ULONGLONG)1)<< 1)
#define PROP_UPPER_CASE (((ULONGLONG)1)<< 2)
#define PROP_LOWER_CASE (((ULONGLONG)1)<< 3)
#define PROP_PERIOD (((ULONGLONG)1)<< 4)
#define PROP_COMMA (((ULONGLONG)1)<< 5)
#define PROP_RESERVED_BREAKER (((ULONGLONG)1)<< 6)
#define PROP_RESERVED (((ULONGLONG)1)<< 7)
#define PROP_NUMBER (((ULONGLONG)1)<< 8)
#define PROP_TILDE (((ULONGLONG)1)<< 9)
#define PROP_EXCLAMATION_MARK (((ULONGLONG)1)<<10)
#define PROP_AT (((ULONGLONG)1)<<11)
#define PROP_POUND (((ULONGLONG)1)<<12)
#define PROP_DOLLAR (((ULONGLONG)1)<<13)
#define PROP_PERCENTAGE (((ULONGLONG)1)<<14)
#define PROP_MINUS (((ULONGLONG)1)<<15)
#define PROP_AND (((ULONGLONG)1)<<16)
#define PROP_ASTERISK (((ULONGLONG)1)<<17)
#define PROP_LEFT_PAREN (((ULONGLONG)1)<<18)
#define PROP_RIGHT_PAREN (((ULONGLONG)1)<<19)
#define PROP_DASH (((ULONGLONG)1)<<20)
#define PROP_UNDERSCORE (((ULONGLONG)1)<<21)
#define PROP_EQUAL (((ULONGLONG)1)<<22)
#define PROP_PLUS (((ULONGLONG)1)<<23)
#define PROP_BACKSLASH (((ULONGLONG)1)<<24)
#define PROP_OR (((ULONGLONG)1)<<25)
#define PROP_LEFT_CURLY_BRACKET (((ULONGLONG)1)<<26)
#define PROP_RIGHT_CURLY_BRACKET (((ULONGLONG)1)<<27)
#define PROP_LEFT_BRAKCET (((ULONGLONG)1)<<28)
#define PROP_RIGHT_BRAKCET (((ULONGLONG)1)<<29)
#define PROP_DOUBLE_QUOTE (((ULONGLONG)1)<<30)
#define PROP_APOSTROPHE (((ULONGLONG)1)<<31)
#define PROP_SEMI_COLON (((ULONGLONG)1)<<32)
#define PROP_COLON (((ULONGLONG)1)<<33)
#define PROP_QUESTION_MARK (((ULONGLONG)1)<<34)
#define PROP_SLASH (((ULONGLONG)1)<<35)
#define PROP_LT (((ULONGLONG)1)<<36)
#define PROP_GT (((ULONGLONG)1)<<37)
#define PROP_W (((ULONGLONG)1)<<38)
#define PROP_CURRENCY (((ULONGLONG)1)<<39)
#define PROP_BREAKER (((ULONGLONG)1)<<40)
#define PROP_TRANSPERENT (((ULONGLONG)1)<<41)
#define PROP_NBS (((ULONGLONG)1)<<42)
#define PROP_ALPHA_XDIGIT (((ULONGLONG)1)<<43)
#define PROP_COMMERSIAL_SIGN (((ULONGLONG)1)<<44)
#define WB_PROP_COUNT 45
//
// The following is the contents of the GEN_PROP_STRING array
// used by the array generator.
//
// NOTE: DO NOT CHANGE THE ORDER.
// UPDATE MACRO WHENEVER FLAGS DEFINITIONS CHANGE TO REFLECT CHANGES!
//
// extern const WCHAR* GEN_PROP_STRING[ WB_PROP_COUNT ];
//
#define GEN_PROP_STRING_VALUE \
{ \
\
L"PROP_WS", \
L"PROP_EOS", \
L"PROP_UPPER_CASE", \
L"PROP_LOWER_CASE", \
L"PROP_PERIOD", \
L"PROP_COMMA", \
L"PROP_RESERVED_BREAKER", \
L"PROP_RESERVED", \
\
L"PROP_NUMBER", \
L"PROP_TILDE", \
L"PROP_EXCLAMATION_MARK", \
L"PROP_AT", \
L"PROP_POUND", \
L"PROP_DOLLAR", \
L"PROP_PERCENTAGE", \
L"PROP_MINUS", \
\
L"PROP_AND", \
L"PROP_ASTERISK", \
L"PROP_LEFT_PAREN", \
L"PROP_RIGHT_PAREN", \
L"PROP_DASH", \
L"PROP_UNDERSCORE", \
L"PROP_EQUAL", \
L"PROP_PLUS", \
\
L"PROP_BACKSLASH", \
L"PROP_OR", \
L"PROP_LEFT_CURLY_BRACKET", \
L"PROP_RIGHT_CURLY_BRACKET", \
L"PROP_LEFT_BRAKCET", \
L"PROP_RIGHT_BRAKCET", \
L"PROP_DOUBLE_QUOTE", \
L"PROP_APOSTROPHE", \
\
L"PROP_SEMI_COLON", \
L"PROP_COLON", \
L"PROP_QUESTION_MARK", \
L"PROP_SLASH", \
L"PROP_LT", \
L"PROP_GT", \
L"PROP_W", \
L"PROP_CURRENCY", \
L"PROP_BREAKER" \
L"PROP_TRANSPERENT" \
L"PROP_NBS" \
L"PROP_ALPHA_XDIGIT" \
L"PROP_COMMERSIAL_SIGN" \
}
#define PROP_ALPHA (PROP_LOWER_CASE | PROP_UPPER_CASE)
#define PROP_ALPHA_NUMERIC (PROP_LOWER_CASE | PROP_UPPER_CASE | PROP_NUMBER)
#define PROP_DATE_SEPERATOR (PROP_DASH | PROP_SLASH | PROP_PERIOD)
#define PROP_XDIGIT (PROP_NUMBER | PROP_ALPHA_XDIGIT)
#define PROP_FIRST_LEVEL_BREAKER \
(PROP_BREAKER | PROP_EXCLAMATION_MARK | PROP_ASTERISK | \
PROP_LEFT_PAREN | PROP_RIGHT_PAREN | PROP_BACKSLASH | PROP_EQUAL | PROP_OR | \
PROP_LEFT_CURLY_BRACKET | PROP_RIGHT_CURLY_BRACKET | PROP_LEFT_BRAKCET | PROP_RIGHT_BRAKCET | \
PROP_DOUBLE_QUOTE | PROP_SEMI_COLON | PROP_QUESTION_MARK | PROP_SLASH | \
PROP_COMMA | PROP_GT | PROP_LT | PROP_WS )
#define PROP_SECOND_LEVEL_BREAKER \
(PROP_TILDE | PROP_AT | PROP_DOLLAR | PROP_PERCENTAGE | PROP_AND |\
PROP_DASH | PROP_PLUS | PROP_COLON | PROP_PERIOD | PROP_POUND)
#define PROP_DEFAULT_BREAKER (PROP_FIRST_LEVEL_BREAKER | PROP_SECOND_LEVEL_BREAKER)
//
// Hyphenation
//
#define HYPHENATION_PUNCT_HEAD (PROP_SEMI_COLON | PROP_COMMA | PROP_COLON | PROP_LEFT_PAREN | PROP_NBS)
#define HYPHENATION_PUNCT_TAIL \
(PROP_SEMI_COLON | PROP_COLON | PROP_COMMA | PROP_EXCLAMATION_MARK | PROP_QUESTION_MARK | \
PROP_RIGHT_PAREN | PROP_PERIOD | PROP_NBS)
//
// Abbreviation, acronym
//
#define ACRONYM_PUNCT_HEAD (PROP_SEMI_COLON | PROP_COMMA | PROP_COLON | PROP_LEFT_PAREN | PROP_NBS)
#define ACRONYM_PUNCT_TAIL \
(PROP_SEMI_COLON | PROP_COLON | PROP_COMMA | PROP_EXCLAMATION_MARK | PROP_QUESTION_MARK | \
PROP_RIGHT_PAREN | PROP_NBS)
#define ABBREVIATION_PUNCT_HEAD (PROP_SEMI_COLON | PROP_COMMA | PROP_COLON | PROP_LEFT_PAREN | \
PROP_NBS | PROP_APOSTROPHE)
#define ABBREVIATION_PUNCT_TAIL \
(PROP_SEMI_COLON | PROP_COLON | PROP_COMMA | PROP_EXCLAMATION_MARK | PROP_QUESTION_MARK | \
PROP_RIGHT_PAREN | PROP_NBS | PROP_APOSTROPHE)
#define ABBREVIATION_EOS \
(PROP_SEMI_COLON | PROP_COLON | PROP_EXCLAMATION_MARK | PROP_QUESTION_MARK | PROP_NBS)
#define SPECIAL_ABBREVIATION_PUNCT_HEAD (PROP_SEMI_COLON | PROP_COMMA | PROP_COLON | PROP_LEFT_PAREN | PROP_NBS)
#define SPECIAL_ABBREVIATION_PUNCT_TAIL \
(PROP_SEMI_COLON | PROP_COLON | PROP_COMMA | PROP_EXCLAMATION_MARK | PROP_QUESTION_MARK | \
PROP_RIGHT_PAREN | PROP_PERIOD | PROP_NBS)
//
// Parenthesis
//
#define PAREN_PUNCT_TAIL (PROP_SEMI_COLON | PROP_COLON | PROP_COMMA | PROP_PERIOD | \
PROP_EXCLAMATION_MARK | PROP_QUESTION_MARK | PROP_NBS | PROP_APOSTROPHE)
#define PAREN_PUNCT_HEAD (PROP_SEMI_COLON | PROP_COLON | PROP_COMMA | PROP_EXCLAMATION_MARK | \
PROP_QUESTION_MARK | PROP_NBS | PROP_APOSTROPHE)
//
// Clitics
//
#define CLITICS_PUNCT_HEAD (PROP_SEMI_COLON | PROP_COMMA | PROP_COLON | PROP_LEFT_PAREN | PROP_NBS)
#define CLITICS_PUNC_TAIL \
(PROP_SEMI_COLON | PROP_COLON | PROP_COMMA | PROP_EXCLAMATION_MARK | PROP_QUESTION_MARK | \
PROP_RIGHT_PAREN | PROP_PERIOD | PROP_NBS)
//
// Numbers date time
//
#define NUM_DATE_TIME_PUNCT_HEAD (PROP_SEMI_COLON | PROP_COMMA | PROP_COLON | PROP_LEFT_PAREN | \
PROP_LEFT_BRAKCET | PROP_LEFT_CURLY_BRACKET | PROP_NBS)
#define NUM_DATE_TIME_PUNCT_TAIL \
(PROP_SEMI_COLON | PROP_COLON | PROP_COMMA | PROP_EXCLAMATION_MARK | PROP_QUESTION_MARK | \
PROP_RIGHT_PAREN | PROP_PERIOD | PROP_RIGHT_BRAKCET | PROP_RIGHT_CURLY_BRACKET | PROP_NBS | \
PROP_PERCENTAGE)
#define TIME_ADDITIONAL_PUNCT_HEAD (PROP_APOSTROPHE)
#define TIME_ADDITIONAL_PUNCT_TAIL (PROP_APOSTROPHE)
#define DATE_ADDITIONAL_PUNCT_HEAD (PROP_APOSTROPHE)
#define DATE_ADDITIONAL_PUNCT_TAIL (PROP_APOSTROPHE)
//
// Currency
//
#define CURRENCY_PUNCT_HEAD (PROP_SEMI_COLON | PROP_COMMA | PROP_COLON | PROP_LEFT_PAREN | \
PROP_LEFT_BRAKCET | PROP_LEFT_CURLY_BRACKET | PROP_APOSTROPHE | \
PROP_NBS)
#define CURRENCY_PUNCT_TAIL \
(PROP_SEMI_COLON | PROP_COLON | PROP_COMMA | PROP_EXCLAMATION_MARK | PROP_QUESTION_MARK | \
PROP_RIGHT_PAREN | PROP_PERIOD | PROP_RIGHT_BRAKCET | PROP_RIGHT_CURLY_BRACKET | \
PROP_APOSTROPHE | PROP_NBS)
//
// Misc
//
#define MISC_PUNCT_HEAD (PROP_SEMI_COLON | PROP_COMMA | PROP_COLON | PROP_LEFT_PAREN | PROP_NBS)
#define MISC_PUNCT_TAIL \
(PROP_SEMI_COLON | PROP_COLON | PROP_COMMA | PROP_EXCLAMATION_MARK | PROP_QUESTION_MARK | \
PROP_RIGHT_PAREN | PROP_PERIOD | PROP_NBS)
//
// Commersial sign
//
#define COMMERSIAL_SIGN_PUNCT_HEAD (PROP_SEMI_COLON | PROP_COMMA | PROP_COLON | PROP_LEFT_PAREN | PROP_NBS)
#define COMMERSIAL_SIGN_PUNCT_TAIL \
(PROP_SEMI_COLON | PROP_COLON | PROP_COMMA | PROP_EXCLAMATION_MARK | PROP_QUESTION_MARK | \
PROP_RIGHT_PAREN | PROP_PERIOD | PROP_NBS)
//
// EOS
//
#define EOS_SUFFIX \
(PROP_WS | PROP_RIGHT_BRAKCET | PROP_RIGHT_PAREN | PROP_RIGHT_CURLY_BRACKET | \
PROP_APOSTROPHE | PROP_NBS)
//
// default
//
#define SIMPLE_PUNCT_HEAD (PROP_NBS | PROP_UNDERSCORE | PROP_DEFAULT_BREAKER | PROP_APOSTROPHE)
#define SIMPLE_PUNCT_TAIL (PROP_NBS | PROP_UNDERSCORE | PROP_DEFAULT_BREAKER | PROP_APOSTROPHE)
#define MAX_NUM_PROP 64
//
// PROP_FLAGS MACROS:
//
#ifndef DECLARE_ULONGLONG_ARRAY
#define GET_PROP(wch) \
( g_pPropArray->m_apCodePage[wch >> 8][(UCHAR)wch] )
#ifdef DECLARE_BYTE_ARRAY
extern const BYTE g_BytePropFlagArray[ ];
#define IS_WS(wch) (g_BytePropFlagArray[wch] & PROP_WS)
#define IS_EOS(wch) (g_BytePropFlagArray[wch] & PROP_EOS)
#define IS_BREAKER(wch) (g_BytePropFlagArray[wch] & PROP_RESERVED_BREAKER)
#else
#define IS_WS(wch) (GET_PROP(wch).m_ulFlag & PROP_WS)
#define IS_EOS(wch) (GET_PROP(wch).m_ulFlag & PROP_EOS)
#define IS_BREAKER(wch) (GET_PROP(wch).m_ulFlag & PROP_DEFAULT_BREAKER)
#endif // DECLARE_BYTE_ARRAY
#else
class CPropFlag;
extern const ULONGLONG g_UllPropFlagArray[ ];
#define GET_PROP(wch) (g_PropFlagArray[wch])
#define IS_WS(wch) (g_UllPropFlagArray[wch] & PROP_WS)
#define IS_EOS(wch) (g_UllPropFlagArray[wch] & PROP_EOS)
#define IS_BREAKER(wch) (g_UllPropFlagArray[wch] & PROP_DEFAULT_BREAKER)
#endif // DECLARE_ULONGLONG_ARRAY
#define HAS_PROP_ALPHA(prop) (prop.m_ulFlag & PROP_ALPHA)
#define HAS_PROP_EXTENDED_ALPHA(prop) (prop.m_ulFlag & (PROP_ALPHA | PROP_TRANSPERENT))
#define HAS_PROP_UPPER_CASE(prop) (prop.m_ulFlag & PROP_UPPER_CASE)
#define HAS_PROP_LOWER_CASE(prop) (prop.m_ulFlag & PROP_LOWER_CASE)
#define HAS_PROP_NUMBER(prop) (prop.m_ulFlag & PROP_NUMBER)
#define HAS_PROP_CURRENCY(prop) (prop.m_ulFlag & PROP_CURRENCY)
#define HAS_PROP_LEFT_PAREN(prop) (prop.m_ulFlag & PROP_LEFT_PAREN)
#define HAS_PROP_RIGHT_PAREN(prop) (prop.m_ulFlag & PROP_RIGHT_PAREN)
#define HAS_PROP_APOSTROPHE(prop) (prop.m_ulFlag & PROP_APOSTROPHE)
#define HAS_PROP_BACKSLASH(prop) (prop.m_ulFlag & PROP_BACKSLASH)
#define HAS_PROP_SLASH(prop) (prop.m_ulFlag & PROP_SLASH)
#define HAS_PROP_PERIOD(prop) (prop.m_ulFlag & PROP_PERIOD)
#define HAS_PROP_COMMA(prop) (prop.m_ulFlag & PROP_COMMA)
#define HAS_PROP_COLON(prop) (prop.m_ulFlag & PROP_COLON)
#define HAS_PROP_DASH(prop) (prop.m_ulFlag & PROP_DASH)
#define HAS_PROP_W(prop) (prop.m_ulFlag & PROP_W)
#define IS_PROP_SIMPLE(prop) \
(!prop.m_ulFlag || \
((prop.m_ulFlag & (PROP_ALPHA | PROP_TRANSPERENT | PROP_W | PROP_ALPHA_XDIGIT)) && \
!(prop.m_ulFlag & ~(PROP_ALPHA | PROP_TRANSPERENT | PROP_W | PROP_ALPHA_XDIGIT))))
#define TEST_PROP(prop, i) (prop.m_ulFlag & (i))
#define TEST_PROP1(prop1, prop2) (prop1.m_ulFlag & prop2.m_ulFlag)
#endif // _PROP_FLAGS_H_