windows-nt/Source/XPSP1/NT/base/win32/winnls/string.c

3746 lines
154 KiB
C
Raw Normal View History

2020-09-26 03:20:57 -05:00
/*++
Copyright (c) 1991-2000, Microsoft Corporation All rights reserved.
Module Name:
string.c
Abstract:
This file contains functions that deal with characters and strings.
APIs found in this file:
CompareStringW
GetStringTypeExW
GetStringTypeW
Revision History:
05-31-91 JulieB Created.
--*/
//
// Include Files.
//
#include "nls.h"
#include "jamo.h"
//
// Constant Declarations.
//
//
// State Table.
//
#define STATE_DW 1 // normal diacritic weight state
#define STATE_REVERSE_DW 2 // reverse diacritic weight state
#define STATE_CW 4 // case weight state
#define STATE_JAMO_WEIGHT 8 // jamo weight state
//
// Invalid weight value.
//
#define CMP_INVALID_WEIGHT 0xffffffff
#define CMP_INVALID_FAREAST 0xffff0000
#define CMP_INVALID_UW 0xffff
//
// Forward Declarations.
//
int
LongCompareStringW(
PLOC_HASH pHashN,
DWORD dwCmpFlags,
LPCWSTR lpString1,
int cchCount1,
LPCWSTR lpString2,
int cchCount2,
BOOL fModify);
int
FindJamoDifference(
PLOC_HASH pHashN,
LPCWSTR* ppString1,
int* ctr1,
int cchCount1,
DWORD* pWeight1,
LPCWSTR* ppString2,
int* ctr2,
int cchCount2,
DWORD* pWeight2,
LPCWSTR* pLastJamo,
WORD* uw1,
WORD* uw2,
int* pState,
int* WhichJamo,
BOOL fModify);
//-------------------------------------------------------------------------//
// INTERNAL MACROS //
//-------------------------------------------------------------------------//
////////////////////////////////////////////////////////////////////////////
//
// NOT_END_STRING
//
// Checks to see if the search has reached the end of the string.
// It returns TRUE if the counter is not at zero (counting backwards) and
// the null termination has not been reached (if -1 was passed in the count
// parameter.
//
// 11-04-92 JulieB Created.
////////////////////////////////////////////////////////////////////////////
#define NOT_END_STRING(ct, ptr, cchIn) \
((ct != 0) && (!((*(ptr) == 0) && (cchIn == -2))))
////////////////////////////////////////////////////////////////////////////
//
// AT_STRING_END
//
// Checks to see if the pointer is at the end of the string.
// It returns TRUE if the counter is zero or if the null termination
// has been reached (if -2 was passed in the count parameter).
//
// 11-04-92 JulieB Created.
////////////////////////////////////////////////////////////////////////////
#define AT_STRING_END(ct, ptr, cchIn) \
((ct == 0) || ((*(ptr) == 0) && (cchIn == -2)))
////////////////////////////////////////////////////////////////////////////
//
// REMOVE_STATE
//
// Removes the current state from the state table. This should only be
// called when the current state should not be entered for the remainder
// of the comparison. It decrements the counter going through the state
// table and decrements the number of states in the table.
//
// 11-04-92 JulieB Created.
////////////////////////////////////////////////////////////////////////////
#define REMOVE_STATE(value) (State &= ~value)
////////////////////////////////////////////////////////////////////////////
//
// POINTER_FIXUP
//
// Fixup the string pointers if expansion characters were found.
// Then, advance the string pointers and decrement the string counters.
//
// 11-04-92 JulieB Created.
////////////////////////////////////////////////////////////////////////////
#define POINTER_FIXUP() \
{ \
/* \
* Fixup the pointers (if necessary). \
*/ \
if (pSave1 && (--cExpChar1 == 0)) \
{ \
/* \
* Done using expansion temporary buffer. \
*/ \
pString1 = pSave1; \
pSave1 = NULL; \
} \
\
if (pSave2 && (--cExpChar2 == 0)) \
{ \
/* \
* Done using expansion temporary buffer. \
*/ \
pString2 = pSave2; \
pSave2 = NULL; \
} \
\
/* \
* Advance the string pointers. \
*/ \
pString1++; \
pString2++; \
}
////////////////////////////////////////////////////////////////////////////
//
// SCAN_LONGER_STRING
//
// Scans the longer string for diacritic, case, and special weights.
//
// 11-04-92 JulieB Created.
////////////////////////////////////////////////////////////////////////////
#define SCAN_LONGER_STRING( ct, \
ptr, \
cchIn, \
ret ) \
{ \
/* \
* Search through the rest of the longer string to make sure \
* all characters are not to be ignored. If find a character that \
* should not be ignored, return the given return value immediately. \
* \
* The only exception to this is when a nonspace mark is found. If \
* another DW difference has been found earlier, then use that. \
*/ \
while (NOT_END_STRING(ct, ptr, cchIn)) \
{ \
Weight1 = GET_DWORD_WEIGHT(pHashN, *ptr); \
switch (GET_SCRIPT_MEMBER(&Weight1)) \
{ \
case ( UNSORTABLE ): \
{ \
break; \
} \
case ( NONSPACE_MARK ): \
{ \
if ((!fIgnoreDiacritic) && (!WhichDiacritic)) \
{ \
return (ret); \
} \
break; \
} \
case ( PUNCTUATION ) : \
case ( SYMBOL_1 ) : \
case ( SYMBOL_2 ) : \
case ( SYMBOL_3 ) : \
case ( SYMBOL_4 ) : \
case ( SYMBOL_5 ) : \
{ \
if (!fIgnoreSymbol) \
{ \
return (ret); \
} \
break; \
} \
case ( EXPANSION ) : \
case ( FAREAST_SPECIAL ) : \
case ( JAMO_SPECIAL ) : \
case ( EXTENSION_A ) : \
default : \
{ \
return (ret); \
} \
} \
\
/* \
* Advance pointer and decrement counter. \
*/ \
ptr++; \
ct--; \
} \
\
/* \
* Need to check diacritic, case, extra, and special weights for \
* final return value. Still could be equal if the longer part of \
* the string contained only characters to be ignored. \
* \
* NOTE: The following checks MUST REMAIN IN THIS ORDER: \
* Diacritic, Case, Extra, Punctuation. \
*/ \
if (WhichDiacritic) \
{ \
return (WhichDiacritic); \
} \
if (WhichCase) \
{ \
return (WhichCase); \
} \
if (WhichExtra) \
{ \
if (!fIgnoreDiacritic) \
{ \
if (GET_WT_FOUR(&WhichExtra)) \
{ \
return (GET_WT_FOUR(&WhichExtra)); \
} \
if (GET_WT_FIVE(&WhichExtra)) \
{ \
return (GET_WT_FIVE(&WhichExtra)); \
} \
} \
if (GET_WT_SIX(&WhichExtra)) \
{ \
return (GET_WT_SIX(&WhichExtra)); \
} \
if (GET_WT_SEVEN(&WhichExtra)) \
{ \
return (GET_WT_SEVEN(&WhichExtra)); \
} \
} \
if (WhichJamo) \
{ \
return (WhichJamo); \
} \
if (WhichPunct1) \
{ \
return (WhichPunct1); \
} \
if (WhichPunct2) \
{ \
return (WhichPunct2); \
} \
\
return (CSTR_EQUAL); \
}
////////////////////////////////////////////////////////////////////////////
//
// QUICK_SCAN_LONGER_STRING
//
// Scans the longer string for diacritic, case, and special weights.
// Assumes that both strings are null-terminated.
//
// 11-04-92 JulieB Created.
////////////////////////////////////////////////////////////////////////////
#define QUICK_SCAN_LONGER_STRING( ptr, \
ret ) \
{ \
/* \
* Search through the rest of the longer string to make sure \
* all characters are not to be ignored. If find a character that \
* should not be ignored, return the given return value immediately. \
* \
* The only exception to this is when a nonspace mark is found. If \
* another DW difference has been found earlier, then use that. \
*/ \
while (*ptr != 0) \
{ \
switch (GET_SCRIPT_MEMBER(&(pHashN->pSortkey[*ptr]))) \
{ \
case ( UNSORTABLE ): \
{ \
break; \
} \
case ( NONSPACE_MARK ): \
{ \
if (!WhichDiacritic) \
{ \
return (ret); \
} \
break; \
} \
default : \
{ \
return (ret); \
} \
} \
\
/* \
* Advance pointer. \
*/ \
ptr++; \
} \
\
/* \
* Need to check diacritic, case, extra, and special weights for \
* final return value. Still could be equal if the longer part of \
* the string contained only unsortable characters. \
* \
* NOTE: The following checks MUST REMAIN IN THIS ORDER: \
* Diacritic, Case, Extra, Punctuation. \
*/ \
if (WhichDiacritic) \
{ \
return (WhichDiacritic); \
} \
if (WhichCase) \
{ \
return (WhichCase); \
} \
if (WhichExtra) \
{ \
if (GET_WT_FOUR(&WhichExtra)) \
{ \
return (GET_WT_FOUR(&WhichExtra)); \
} \
if (GET_WT_FIVE(&WhichExtra)) \
{ \
return (GET_WT_FIVE(&WhichExtra)); \
} \
if (GET_WT_SIX(&WhichExtra)) \
{ \
return (GET_WT_SIX(&WhichExtra)); \
} \
if (GET_WT_SEVEN(&WhichExtra)) \
{ \
return (GET_WT_SEVEN(&WhichExtra)); \
} \
} \
if (WhichJamo) \
{ \
return (WhichJamo); \
} \
if (WhichPunct1) \
{ \
return (WhichPunct1); \
} \
if (WhichPunct2) \
{ \
return (WhichPunct2); \
} \
\
return (CSTR_EQUAL); \
}
////////////////////////////////////////////////////////////////////////////
//
// GET_FAREAST_WEIGHT
//
// Returns the weight for the far east special case in "wt". This currently
// includes the Cho-on, the Repeat, and the Kana characters.
//
// 08-19-93 JulieB Created.
////////////////////////////////////////////////////////////////////////////
#define GET_FAREAST_WEIGHT( wt, \
uw, \
mask, \
pBegin, \
pCur, \
ExtraWt, \
fModify ) \
{ \
int ct; /* loop counter */ \
BYTE PrevSM; /* previous script member value */ \
BYTE PrevAW; /* previous alphanumeric value */ \
BYTE PrevCW; /* previous case value */ \
BYTE AW; /* alphanumeric value */ \
BYTE CW; /* case value */ \
DWORD PrevWt; /* previous weight */ \
\
\
/* \
* Get the alphanumeric weight and the case weight of the \
* current code point. \
*/ \
AW = GET_ALPHA_NUMERIC(&wt); \
CW = GET_CASE(&wt); \
ExtraWt = (DWORD)0; \
\
/* \
* Special case Repeat and Cho-On. \
* AW = 0 => Repeat \
* AW = 1 => Cho-On \
* AW = 2+ => Kana \
*/ \
if (AW <= MAX_SPECIAL_AW) \
{ \
/* \
* If the script member of the previous character is \
* invalid, then give the special character an \
* invalid weight (highest possible weight) so that it \
* will sort AFTER everything else. \
*/ \
ct = 1; \
PrevWt = CMP_INVALID_FAREAST; \
while ((pCur - ct) >= pBegin) \
{ \
PrevWt = GET_DWORD_WEIGHT(pHashN, *(pCur - ct)); \
PrevWt &= mask; \
PrevSM = GET_SCRIPT_MEMBER(&PrevWt); \
if (PrevSM < FAREAST_SPECIAL) \
{ \
if (PrevSM == EXPANSION) \
{ \
PrevWt = CMP_INVALID_FAREAST; \
} \
else \
{ \
/* \
* UNSORTABLE or NONSPACE_MARK. \
* \
* Just ignore these, since we only care about the \
* previous UW value. \
*/ \
PrevWt = CMP_INVALID_FAREAST; \
ct++; \
continue; \
} \
} \
else if (PrevSM == FAREAST_SPECIAL) \
{ \
PrevAW = GET_ALPHA_NUMERIC(&PrevWt); \
if (PrevAW <= MAX_SPECIAL_AW) \
{ \
/* \
* Handle case where two special chars follow \
* each other. Keep going back in the string. \
*/ \
PrevWt = CMP_INVALID_FAREAST; \
ct++; \
continue; \
} \
\
UNICODE_WT(&PrevWt) = \
MAKE_UNICODE_WT(KANA, PrevAW, fModify); \
\
/* \
* Only build weights 4, 5, 6, and 7 if the \
* previous character is KANA. \
* \
* Always: \
* 4W = previous CW & ISOLATE_SMALL \
* 6W = previous CW & ISOLATE_KANA \
* \
*/ \
PrevCW = GET_CASE(&PrevWt); \
GET_WT_FOUR(&ExtraWt) = PrevCW & ISOLATE_SMALL; \
GET_WT_SIX(&ExtraWt) = PrevCW & ISOLATE_KANA; \
\
if (AW == AW_REPEAT) \
{ \
/* \
* Repeat: \
* UW = previous UW \
* 5W = WT_FIVE_REPEAT \
* 7W = previous CW & ISOLATE_WIDTH \
*/ \
uw = UNICODE_WT(&PrevWt); \
GET_WT_FIVE(&ExtraWt) = WT_FIVE_REPEAT; \
GET_WT_SEVEN(&ExtraWt) = PrevCW & ISOLATE_WIDTH; \
} \
else \
{ \
/* \
* Cho-On: \
* UW = previous UW & CHO_ON_UW_MASK \
* 5W = WT_FIVE_CHO_ON \
* 7W = current CW & ISOLATE_WIDTH \
*/ \
uw = UNICODE_WT(&PrevWt) & CHO_ON_UW_MASK; \
GET_WT_FIVE(&ExtraWt) = WT_FIVE_CHO_ON; \
GET_WT_SEVEN(&ExtraWt) = CW & ISOLATE_WIDTH; \
} \
} \
else \
{ \
uw = GET_UNICODE_MOD(&PrevWt, fModify); \
} \
\
break; \
} \
} \
else \
{ \
/* \
* Kana: \
* SM = KANA \
* AW = current AW \
* 4W = current CW & ISOLATE_SMALL \
* 5W = WT_FIVE_KANA \
* 6W = current CW & ISOLATE_KANA \
* 7W = current CW & ISOLATE_WIDTH \
*/ \
uw = MAKE_UNICODE_WT(KANA, AW, fModify); \
GET_WT_FOUR(&ExtraWt) = CW & ISOLATE_SMALL; \
GET_WT_FIVE(&ExtraWt) = WT_FIVE_KANA; \
GET_WT_SIX(&ExtraWt) = CW & ISOLATE_KANA; \
GET_WT_SEVEN(&ExtraWt) = CW & ISOLATE_WIDTH; \
} \
\
/* \
* Get the weight for the far east special case and store it in wt. \
*/ \
if ((AW > MAX_SPECIAL_AW) || (PrevWt != CMP_INVALID_FAREAST)) \
{ \
/* \
* Always: \
* DW = current DW \
* CW = minimum CW \
*/ \
UNICODE_WT(&wt) = uw; \
CASE_WT(&wt) = MIN_CW; \
} \
else \
{ \
uw = CMP_INVALID_UW; \
wt = CMP_INVALID_FAREAST; \
ExtraWt = 0; \
} \
}
//-------------------------------------------------------------------------//
// API ROUTINES //
//-------------------------------------------------------------------------//
////////////////////////////////////////////////////////////////////////////
//
// CompareStringW
//
// Compares two wide character strings of the same locale according to the
// supplied locale handle.
//
// 05-31-91 JulieB Created.
////////////////////////////////////////////////////////////////////////////
int WINAPI CompareStringW(
LCID Locale,
DWORD dwCmpFlags,
LPCWSTR lpString1,
int cchCount1,
LPCWSTR lpString2,
int cchCount2)
{
register LPWSTR pString1; // ptr to go thru string 1
register LPWSTR pString2; // ptr to go thru string 2
PLOC_HASH pHashN; // ptr to LOC hash node
BOOL fIgnorePunct; // flag to ignore punctuation (not symbol)
BOOL fModify; // flag to use modified script member weights
DWORD State; // state table
DWORD Mask; // mask for weights
DWORD Weight1; // full weight of char - string 1
DWORD Weight2; // full weight of char - string 2
int JamoFlag = FALSE;
LPCWSTR pLastJamo = lpString1;
int WhichDiacritic; // DW => 1 = str1 smaller, 3 = str2 smaller
int WhichCase; // CW => 1 = str1 smaller, 3 = str2 smaller
int WhichJamo; // XW for Jamo
int WhichPunct1; // SW => 1 = str1 smaller, 3 = str2 smaller
int WhichPunct2; // SW => 1 = str1 smaller, 3 = str2 smaller
LPWSTR pSave1; // ptr to saved pString1
LPWSTR pSave2; // ptr to saved pString2
int cExpChar1, cExpChar2; // ct of expansions in tmp
DWORD ExtraWt1, ExtraWt2; // extra weight values (for far east)
DWORD WhichExtra; // XW => wts 4, 5, 6, 7 (for far east)
//
// Invalid Parameter Check:
// - validate LCID
// - either string is null
//
VALIDATE_LANGUAGE(Locale, pHashN, 0, TRUE);
if ((pHashN == NULL) ||
(lpString1 == NULL) || (lpString2 == NULL))
{
SetLastError(ERROR_INVALID_PARAMETER);
return (0);
}
//
// Make sure the appropriate sorting tables are available. If not,
// return an error.
//
if ((pHashN->pSortkey == NULL) ||
(pHashN->IfIdeographFailure == TRUE))
{
KdPrint(("NLSAPI: Appropriate Sorting Tables Not Loaded.\n"));
SetLastError(ERROR_FILE_NOT_FOUND);
return (0);
}
//
// Call longer compare string if any of the following is true:
// - compression locale
// - either count is not -1
// - dwCmpFlags is not 0 or ignore case (see NOTE below)
// - locale is Korean - script member weight adjustment needed
//
// NOTE: If the value of NORM_IGNORECASE ever changes, this
// code should check for:
// ( (dwCmpFlags != 0) && (dwCmpFlags != NORM_IGNORECASE) )
// Since NORM_IGNORECASE is equal to 1, we can optimize this
// by checking for > 1.
//
dwCmpFlags &= (~LOCALE_USE_CP_ACP);
fModify = IS_KOREAN(Locale);
if ( (pHashN->IfCompression) ||
(cchCount1 > -1) || (cchCount2 > -1) ||
(dwCmpFlags > NORM_IGNORECASE) ||
(fModify == TRUE) )
{
return (LongCompareStringW( pHashN,
dwCmpFlags,
lpString1,
((cchCount1 <= -1) ? -2 : cchCount1),
lpString2,
((cchCount2 <= -1) ? -2 : cchCount2),
fModify ));
}
//
// Initialize string pointers.
//
pString1 = (LPWSTR)lpString1;
pString2 = (LPWSTR)lpString2;
//
// Do a wchar by wchar compare.
//
while (TRUE)
{
//
// See if characters are equal.
// If characters are equal, increment pointers and continue
// string compare.
//
// NOTE: Loop is unrolled 8 times for performance.
//
if ((*pString1 != *pString2) || (*pString1 == 0))
{
break;
}
pString1++;
pString2++;
if ((*pString1 != *pString2) || (*pString1 == 0))
{
break;
}
pString1++;
pString2++;
if ((*pString1 != *pString2) || (*pString1 == 0))
{
break;
}
pString1++;
pString2++;
if ((*pString1 != *pString2) || (*pString1 == 0))
{
break;
}
pString1++;
pString2++;
if ((*pString1 != *pString2) || (*pString1 == 0))
{
break;
}
pString1++;
pString2++;
if ((*pString1 != *pString2) || (*pString1 == 0))
{
break;
}
pString1++;
pString2++;
if ((*pString1 != *pString2) || (*pString1 == 0))
{
break;
}
pString1++;
pString2++;
if ((*pString1 != *pString2) || (*pString1 == 0))
{
break;
}
pString1++;
pString2++;
}
//
// If strings are both at null terminators, return equal.
//
if (*pString1 == *pString2)
{
return (CSTR_EQUAL);
}
//
// Initialize flags, pointers, and counters.
//
fIgnorePunct = FALSE;
WhichDiacritic = 0;
WhichCase = 0;
WhichJamo = 0;
WhichPunct1 = 0;
WhichPunct2 = 0;
pSave1 = NULL;
pSave2 = NULL;
ExtraWt1 = (DWORD)0;
WhichExtra = (DWORD)0;
//
// Switch on the different flag options. This will speed up
// the comparisons of two strings that are different.
//
// The only two possibilities in this optimized section are
// no flags and the ignore case flag.
//
if (dwCmpFlags == 0)
{
Mask = CMP_MASKOFF_NONE;
}
else
{
Mask = CMP_MASKOFF_CW;
}
State = (pHashN->IfReverseDW) ? STATE_REVERSE_DW : STATE_DW;
State |= (STATE_CW | STATE_JAMO_WEIGHT);
//
// Compare each character's sortkey weight in the two strings.
//
while ((*pString1 != 0) && (*pString2 != 0))
{
Weight1 = GET_DWORD_WEIGHT(pHashN, *pString1);
Weight2 = GET_DWORD_WEIGHT(pHashN, *pString2);
Weight1 &= Mask;
Weight2 &= Mask;
if (Weight1 != Weight2)
{
BYTE sm1 = GET_SCRIPT_MEMBER(&Weight1); // script member 1
BYTE sm2 = GET_SCRIPT_MEMBER(&Weight2); // script member 2
WORD uw1 = GET_UNICODE_SM(&Weight1, sm1); // unicode weight 1
WORD uw2 = GET_UNICODE_SM(&Weight2, sm2); // unicode weight 2
BYTE dw1; // diacritic weight 1
BYTE dw2; // diacritic weight 2
BOOL fContinue; // flag to continue loop
DWORD Wt; // temp weight holder
WCHAR pTmpBuf1[MAX_TBL_EXPANSION]; // temp buffer for exp 1
WCHAR pTmpBuf2[MAX_TBL_EXPANSION]; // temp buffer for exp 2
//
// If Unicode Weights are different and no special cases,
// then we're done. Otherwise, we need to do extra checking.
//
// Must check ENTIRE string for any possibility of Unicode Weight
// differences. As soon as a Unicode Weight difference is found,
// then we're done. If no UW difference is found, then the
// first Diacritic Weight difference is used. If no DW difference
// is found, then use the first Case Difference. If no CW
// difference is found, then use the first Extra Weight
// difference. If no XW difference is found, then use the first
// Special Weight difference.
//
if ((uw1 != uw2) ||
(sm1 == FAREAST_SPECIAL) ||
(sm1 == EXTENSION_A))
{
//
// Initialize the continue flag.
//
fContinue = FALSE;
//
// Check for Unsortable characters and skip them.
// This needs to be outside the switch statement. If EITHER
// character is unsortable, must skip it and start over.
//
if (sm1 == UNSORTABLE)
{
pString1++;
fContinue = TRUE;
}
if (sm2 == UNSORTABLE)
{
pString2++;
fContinue = TRUE;
}
if (fContinue)
{
continue;
}
//
// Switch on the script member of string 1 and take care
// of any special cases.
//
switch (sm1)
{
case ( NONSPACE_MARK ) :
{
//
// Nonspace only - look at diacritic weight only.
//
if ((WhichDiacritic == 0) ||
(State & STATE_REVERSE_DW))
{
WhichDiacritic = CSTR_GREATER_THAN;
//
// Remove state from state machine.
//
REMOVE_STATE(STATE_DW);
}
//
// Adjust pointer and set flags.
//
pString1++;
fContinue = TRUE;
break;
}
case ( PUNCTUATION ) :
{
//
// If the ignore punctuation flag is set, then skip
// over the punctuation.
//
if (fIgnorePunct)
{
pString1++;
fContinue = TRUE;
}
else if (sm2 != PUNCTUATION)
{
//
// The character in the second string is
// NOT punctuation.
//
if (WhichPunct2)
{
//
// Set WP 2 to show that string 2 is smaller,
// since a punctuation char had already been
// found at an earlier position in string 2.
//
// Set the Ignore Punctuation flag so we just
// skip over any other punctuation chars in
// the string.
//
WhichPunct2 = CSTR_GREATER_THAN;
fIgnorePunct = TRUE;
}
else
{
//
// Set WP 1 to show that string 2 is smaller,
// and that string 1 has had a punctuation
// char - since no punctuation chars have
// been found in string 2.
//
WhichPunct1 = CSTR_GREATER_THAN;
}
//
// Advance pointer 1, and set flag to true.
//
pString1++;
fContinue = TRUE;
}
//
// Do NOT want to advance the pointer in string 1 if
// string 2 is also a punctuation char. This will
// be done later.
//
break;
}
case ( EXPANSION ) :
{
//
// Save pointer in pString1 so that it can be
// restored.
//
if (pSave1 == NULL)
{
pSave1 = pString1;
}
pString1 = pTmpBuf1;
//
// Expand character into temporary buffer.
//
pTmpBuf1[0] = GET_EXPANSION_1(&Weight1);
pTmpBuf1[1] = GET_EXPANSION_2(&Weight1);
//
// Set cExpChar1 to the number of expansion characters
// stored.
//
cExpChar1 = MAX_TBL_EXPANSION;
fContinue = TRUE;
break;
}
case ( FAREAST_SPECIAL ) :
{
if (sm2 != EXPANSION)
{
//
// Get the weight for the far east special case
// and store it in Weight1.
//
GET_FAREAST_WEIGHT( Weight1,
uw1,
Mask,
lpString1,
pString1,
ExtraWt1,
FALSE );
if (sm2 != FAREAST_SPECIAL)
{
//
// The character in the second string is
// NOT a fareast special char.
//
// Set each of weights 4, 5, 6, and 7 to show
// that string 2 is smaller (if not already set).
//
if ((GET_WT_FOUR(&WhichExtra) == 0) &&
(GET_WT_FOUR(&ExtraWt1) != 0))
{
GET_WT_FOUR(&WhichExtra) = CSTR_GREATER_THAN;
}
if ((GET_WT_FIVE(&WhichExtra) == 0) &&
(GET_WT_FIVE(&ExtraWt1) != 0))
{
GET_WT_FIVE(&WhichExtra) = CSTR_GREATER_THAN;
}
if ((GET_WT_SIX(&WhichExtra) == 0) &&
(GET_WT_SIX(&ExtraWt1) != 0))
{
GET_WT_SIX(&WhichExtra) = CSTR_GREATER_THAN;
}
if ((GET_WT_SEVEN(&WhichExtra) == 0) &&
(GET_WT_SEVEN(&ExtraWt1) != 0))
{
GET_WT_SEVEN(&WhichExtra) = CSTR_GREATER_THAN;
}
}
}
break;
}
case ( JAMO_SPECIAL ) :
{
int ctr1; // dummy variables for FindJamoDifference
LPWSTR pStr1 = pString1;
LPWSTR pStr2 = pString2;
//
// Set the JamoFlag so we don't handle it again.
//
JamoFlag = TRUE;
fContinue = FindJamoDifference(
pHashN,
&pStr1, &ctr1, -2, &Weight1,
&pStr2, &ctr1, -2, &Weight2,
&pLastJamo,
&uw1, &uw2,
&State,
&WhichJamo,
fModify );
if (WhichJamo)
{
return (WhichJamo);
}
pString1 = pStr1;
pString2 = pStr2;
break;
}
case ( EXTENSION_A ) :
{
//
// Compare the weights.
//
if (Weight1 == Weight2)
{
//
// Adjust pointers and set flag.
//
pString1++; pString2++;
fContinue = TRUE;
}
else
{
//
// Get the actual UW to compare.
//
if (sm2 == EXTENSION_A)
{
//
// Set the UW values to be the AW and DW since
// both strings contain an extension A char.
//
uw1 = MAKE_UNICODE_WT( GET_ALPHA_NUMERIC(&Weight1),
GET_DIACRITIC(&Weight1),
FALSE );
uw2 = MAKE_UNICODE_WT( GET_ALPHA_NUMERIC(&Weight2),
GET_DIACRITIC(&Weight2),
FALSE );
}
else
{
//
// Only string1 contains an extension A char,
// so set the UW value to be the first UW
// value for extension A (default values):
// SM_EXT_A, AW_EXT_A
//
uw1 = MAKE_UNICODE_WT(SM_EXT_A, AW_EXT_A, fModify);
}
}
break;
}
case ( UNSORTABLE ) :
{
//
// Fill out the case statement so the compiler
// will use a jump table.
//
break;
}
}
//
// Switch on the script member of string 2 and take care
// of any special cases.
//
switch (sm2)
{
case ( NONSPACE_MARK ) :
{
//
// Nonspace only - look at diacritic weight only.
//
if ((WhichDiacritic == 0) ||
(State & STATE_REVERSE_DW))
{
WhichDiacritic = CSTR_LESS_THAN;
//
// Remove state from state machine.
//
REMOVE_STATE(STATE_DW);
}
//
// Adjust pointer and set flags.
//
pString2++;
fContinue = TRUE;
break;
}
case ( PUNCTUATION ) :
{
//
// If the ignore punctuation flag is set, then skip
// over the punctuation.
//
if (fIgnorePunct)
{
//
// Pointer 2 will be advanced after if-else
// statement.
//
;
}
else if (sm1 != PUNCTUATION)
{
//
// The character in the first string is
// NOT punctuation.
//
if (WhichPunct1)
{
//
// Set WP 1 to show that string 1 is smaller,
// since a punctuation char had already
// been found at an earlier position in
// string 1.
//
// Set the Ignore Punctuation flag so we just
// skip over any other punctuation in the
// string.
//
WhichPunct1 = CSTR_LESS_THAN;
fIgnorePunct = TRUE;
}
else
{
//
// Set WP 2 to show that string 1 is smaller,
// and that string 2 has had a punctuation
// char - since no punctuation chars have
// been found in string 1.
//
WhichPunct2 = CSTR_LESS_THAN;
}
//
// Pointer 2 will be advanced after if-else
// statement.
//
}
else
{
//
// Both code points are punctuation.
//
// See if either of the strings has encountered
// punctuation chars previous to this.
//
if (WhichPunct1)
{
//
// String 1 has had a punctuation char, so
// it should be the smaller string (since
// both have punctuation chars).
//
WhichPunct1 = CSTR_LESS_THAN;
}
else if (WhichPunct2)
{
//
// String 2 has had a punctuation char, so
// it should be the smaller string (since
// both have punctuation chars).
//
WhichPunct2 = CSTR_GREATER_THAN;
}
else
{
//
// Position is the same, so compare the
// special weights. Set WhichPunct1 to
// the smaller special weight.
//
WhichPunct1 = (((GET_ALPHA_NUMERIC(&Weight1) <
GET_ALPHA_NUMERIC(&Weight2)))
? CSTR_LESS_THAN
: CSTR_GREATER_THAN);
}
//
// Set the Ignore Punctuation flag so we just
// skip over any other punctuation in the string.
//
fIgnorePunct = TRUE;
//
// Advance pointer 1. Pointer 2 will be
// advanced after if-else statement.
//
pString1++;
}
//
// Advance pointer 2 and set flag to true.
//
pString2++;
fContinue = TRUE;
break;
}
case ( EXPANSION ) :
{
//
// Save pointer in pString1 so that it can be
// restored.
//
if (pSave2 == NULL)
{
pSave2 = pString2;
}
pString2 = pTmpBuf2;
//
// Expand character into temporary buffer.
//
pTmpBuf2[0] = GET_EXPANSION_1(&Weight2);
pTmpBuf2[1] = GET_EXPANSION_2(&Weight2);
//
// Set cExpChar2 to the number of expansion characters
// stored.
//
cExpChar2 = MAX_TBL_EXPANSION;
fContinue = TRUE;
break;
}
case ( FAREAST_SPECIAL ) :
{
if (sm1 != EXPANSION)
{
//
// Get the weight for the far east special case
// and store it in Weight2.
//
GET_FAREAST_WEIGHT( Weight2,
uw2,
Mask,
lpString2,
pString2,
ExtraWt2,
FALSE );
if (sm1 != FAREAST_SPECIAL)
{
//
// The character in the first string is
// NOT a fareast special char.
//
// Set each of weights 4, 5, 6, and 7 to show
// that string 1 is smaller (if not already set).
//
if ((GET_WT_FOUR(&WhichExtra) == 0) &&
(GET_WT_FOUR(&ExtraWt2) != 0))
{
GET_WT_FOUR(&WhichExtra) = CSTR_LESS_THAN;
}
if ((GET_WT_FIVE(&WhichExtra) == 0) &&
(GET_WT_FIVE(&ExtraWt2) != 0))
{
GET_WT_FIVE(&WhichExtra) = CSTR_LESS_THAN;
}
if ((GET_WT_SIX(&WhichExtra) == 0) &&
(GET_WT_SIX(&ExtraWt2) != 0))
{
GET_WT_SIX(&WhichExtra) = CSTR_LESS_THAN;
}
if ((GET_WT_SEVEN(&WhichExtra) == 0) &&
(GET_WT_SEVEN(&ExtraWt2) != 0))
{
GET_WT_SEVEN(&WhichExtra) = CSTR_LESS_THAN;
}
}
else
{
//
// Characters in both strings are fareast
// special chars.
//
// Set each of weights 4, 5, 6, and 7
// appropriately (if not already set).
//
if ( (GET_WT_FOUR(&WhichExtra) == 0) &&
( GET_WT_FOUR(&ExtraWt1) !=
GET_WT_FOUR(&ExtraWt2) ) )
{
GET_WT_FOUR(&WhichExtra) =
( GET_WT_FOUR(&ExtraWt1) <
GET_WT_FOUR(&ExtraWt2) )
? CSTR_LESS_THAN
: CSTR_GREATER_THAN;
}
if ( (GET_WT_FIVE(&WhichExtra) == 0) &&
( GET_WT_FIVE(&ExtraWt1) !=
GET_WT_FIVE(&ExtraWt2) ) )
{
GET_WT_FIVE(&WhichExtra) =
( GET_WT_FIVE(&ExtraWt1) <
GET_WT_FIVE(&ExtraWt2) )
? CSTR_LESS_THAN
: CSTR_GREATER_THAN;
}
if ( (GET_WT_SIX(&WhichExtra) == 0) &&
( GET_WT_SIX(&ExtraWt1) !=
GET_WT_SIX(&ExtraWt2) ) )
{
GET_WT_SIX(&WhichExtra) =
( GET_WT_SIX(&ExtraWt1) <
GET_WT_SIX(&ExtraWt2) )
? CSTR_LESS_THAN
: CSTR_GREATER_THAN;
}
if ( (GET_WT_SEVEN(&WhichExtra) == 0) &&
( GET_WT_SEVEN(&ExtraWt1) !=
GET_WT_SEVEN(&ExtraWt2) ) )
{
GET_WT_SEVEN(&WhichExtra) =
( GET_WT_SEVEN(&ExtraWt1) <
GET_WT_SEVEN(&ExtraWt2) )
? CSTR_LESS_THAN
: CSTR_GREATER_THAN;
}
}
}
break;
}
case ( JAMO_SPECIAL ) :
{
if (!JamoFlag)
{
int ctr1, ctr2; // dummy variables for FindJamoDifference
LPWSTR pStr1 = pString1;
LPWSTR pStr2 = pString2;
//
// Set the JamoFlag so we don't handle it again.
//
JamoFlag = TRUE;
fContinue = FindJamoDifference(
pHashN,
&pStr1, &ctr1, -2, &Weight1,
&pStr2, &ctr2, -2, &Weight2,
&pLastJamo,
&uw1, &uw2,
&State,
&WhichJamo,
fModify );
if (WhichJamo)
{
return (WhichJamo);
}
pString1 = pStr1;
pString2 = pStr2;
}
else
{
JamoFlag = FALSE;
}
break;
}
case ( EXTENSION_A ) :
{
//
// If sm1 is an extension A character, then
// both sm1 and sm2 have been handled. We should
// only get here when either sm1 is not an
// extension A character or the two extension A
// characters are different.
//
if (sm1 != EXTENSION_A)
{
//
// Get the actual UW to compare.
//
// Only string2 contains an extension A char,
// so set the UW value to be the first UW
// value for extension A (default values):
// SM_EXT_A, AW_EXT_A
//
uw2 = MAKE_UNICODE_WT(SM_EXT_A, AW_EXT_A, fModify);
}
//
// We should then fall through to the comparison
// of the Unicode weights.
//
break;
}
case ( UNSORTABLE ) :
{
//
// Fill out the case statement so the compiler
// will use a jump table.
//
break;
}
}
//
// See if the comparison should start again.
//
if (fContinue)
{
continue;
}
//
// We're not supposed to drop down into the state table if
// unicode weights are different, so stop comparison and
// return result of unicode weight comparison.
//
if (uw1 != uw2)
{
return ((uw1 < uw2) ? CSTR_LESS_THAN : CSTR_GREATER_THAN);
}
}
//
// For each state in the state table, do the appropriate
// comparisons. (UW1 == UW2)
//
if (State & (STATE_DW | STATE_REVERSE_DW))
{
//
// Get the diacritic weights.
//
dw1 = GET_DIACRITIC(&Weight1);
dw2 = GET_DIACRITIC(&Weight2);
if (dw1 != dw2)
{
//
// Look ahead to see if diacritic follows a
// minimum diacritic weight. If so, get the
// diacritic weight of the nonspace mark.
//
while (*(pString1 + 1) != 0)
{
Wt = GET_DWORD_WEIGHT(pHashN, *(pString1 + 1));
if (GET_SCRIPT_MEMBER(&Wt) == NONSPACE_MARK)
{
dw1 += GET_DIACRITIC(&Wt);
pString1++;
}
else
{
break;
}
}
while (*(pString2 + 1) != 0)
{
Wt = GET_DWORD_WEIGHT(pHashN, *(pString2 + 1));
if (GET_SCRIPT_MEMBER(&Wt) == NONSPACE_MARK)
{
dw2 += GET_DIACRITIC(&Wt);
pString2++;
}
else
{
break;
}
}
//
// Save which string has the smaller diacritic
// weight if the diacritic weights are still
// different.
//
if (dw1 != dw2)
{
WhichDiacritic = (dw1 < dw2)
? CSTR_LESS_THAN
: CSTR_GREATER_THAN;
//
// Remove state from state machine.
//
REMOVE_STATE(STATE_DW);
}
}
}
if (State & STATE_CW)
{
//
// Get the case weights.
//
if (GET_CASE(&Weight1) != GET_CASE(&Weight2))
{
//
// Save which string has the smaller case weight.
//
WhichCase = (GET_CASE(&Weight1) < GET_CASE(&Weight2))
? CSTR_LESS_THAN
: CSTR_GREATER_THAN;
//
// Remove state from state machine.
//
REMOVE_STATE(STATE_CW);
}
}
}
//
// Fixup the pointers.
//
POINTER_FIXUP();
}
//
// If the end of BOTH strings has been reached, then the unicode
// weights match exactly. Check the diacritic, case and special
// weights. If all are zero, then return success. Otherwise,
// return the result of the weight difference.
//
// NOTE: The following checks MUST REMAIN IN THIS ORDER:
// Diacritic, Case, Punctuation.
//
if (*pString1 == 0)
{
if (*pString2 == 0)
{
if (WhichDiacritic)
{
return (WhichDiacritic);
}
if (WhichCase)
{
return (WhichCase);
}
if (WhichExtra)
{
if (GET_WT_FOUR(&WhichExtra))
{
return (GET_WT_FOUR(&WhichExtra));
}
if (GET_WT_FIVE(&WhichExtra))
{
return (GET_WT_FIVE(&WhichExtra));
}
if (GET_WT_SIX(&WhichExtra))
{
return (GET_WT_SIX(&WhichExtra));
}
if (GET_WT_SEVEN(&WhichExtra))
{
return (GET_WT_SEVEN(&WhichExtra));
}
}
if (WhichPunct1)
{
return (WhichPunct1);
}
if (WhichPunct2)
{
return (WhichPunct2);
}
return (CSTR_EQUAL);
}
else
{
//
// String 2 is longer.
//
pString1 = pString2;
}
}
//
// Scan to the end of the longer string.
//
QUICK_SCAN_LONGER_STRING( pString1,
((*pString2 == 0)
? CSTR_GREATER_THAN
: CSTR_LESS_THAN) );
}
////////////////////////////////////////////////////////////////////////////
//
// GetStringTypeExW
//
// Returns character type information about a particular Unicode string.
//
// 01-18-94 JulieB Created.
////////////////////////////////////////////////////////////////////////////
BOOL WINAPI GetStringTypeExW(
LCID Locale,
DWORD dwInfoType,
LPCWSTR lpSrcStr,
int cchSrc,
LPWORD lpCharType)
{
PLOC_HASH pHashN; // ptr to LOC hash node
//
// Invalid Parameter Check:
// - Validate LCID
//
VALIDATE_LOCALE(Locale, pHashN, FALSE);
if (pHashN == NULL)
{
SetLastError(ERROR_INVALID_PARAMETER);
return (0);
}
//
// Return the result of GetStringTypeW.
//
return (GetStringTypeW( dwInfoType,
lpSrcStr,
cchSrc,
lpCharType ));
}
////////////////////////////////////////////////////////////////////////////
//
// GetStringTypeW
//
// Returns character type information about a particular Unicode string.
//
// NOTE: The number of parameters is different from GetStringTypeA.
// The 16-bit OLE product shipped GetStringTypeA with the wrong
// parameters (ported from Chicago) and now we must support it.
//
// Use GetStringTypeEx to get the same set of parameters between
// the A and W version.
//
// 05-31-91 JulieB Created.
////////////////////////////////////////////////////////////////////////////
BOOL WINAPI GetStringTypeW(
DWORD dwInfoType,
LPCWSTR lpSrcStr,
int cchSrc,
LPWORD lpCharType)
{
int Ctr; // loop counter
//
// Invalid Parameter Check:
// - lpSrcStr NULL
// - cchSrc is 0
// - lpCharType NULL
// - same buffer - src and destination
// - (flags will be checked in switch statement below)
//
if ( (lpSrcStr == NULL) || (cchSrc == 0) ||
(lpCharType == NULL) || (lpSrcStr == lpCharType) )
{
SetLastError(ERROR_INVALID_PARAMETER);
return (FALSE);
}
//
// If cchSrc is -1, then the source string is null terminated and we
// need to get the length of the source string. Add one to the
// length to include the null termination.
// (This will always be at least 1.)
//
if (cchSrc <= -1)
{
cchSrc = NlsStrLenW(lpSrcStr) + 1;
}
//
// Make sure the ctype table is mapped in.
//
if (GetCTypeFileInfo())
{
SetLastError(ERROR_FILE_NOT_FOUND);
return (FALSE);
}
//
// Return the appropriate information in the lpCharType parameter
// based on the dwInfoType parameter.
//
switch (dwInfoType)
{
case ( CT_CTYPE1 ) :
{
//
// Return the ctype 1 information for the string.
//
for (Ctr = 0; Ctr < cchSrc; Ctr++)
{
lpCharType[Ctr] = GET_CTYPE(lpSrcStr[Ctr], CType1);
}
break;
}
case ( CT_CTYPE2 ) :
{
//
// Return the ctype 2 information.
//
for (Ctr = 0; Ctr < cchSrc; Ctr++)
{
lpCharType[Ctr] = GET_CTYPE(lpSrcStr[Ctr], CType2);
}
break;
}
case ( CT_CTYPE3 ) :
{
//
// Return the ctype 3 information.
//
for (Ctr = 0; Ctr < cchSrc; Ctr++)
{
lpCharType[Ctr] = GET_CTYPE(lpSrcStr[Ctr], CType3);
}
break;
}
default :
{
//
// Invalid flag parameter, so return failure.
//
SetLastError(ERROR_INVALID_FLAGS);
return (FALSE);
}
}
//
// Return success.
//
return (TRUE);
}
//-------------------------------------------------------------------------//
// INTERNAL ROUTINES //
//-------------------------------------------------------------------------//
////////////////////////////////////////////////////////////////////////////
//
// LongCompareStringW
//
// Compares two wide character strings of the same locale according to the
// supplied locale handle.
//
// 05-31-91 JulieB Created.
////////////////////////////////////////////////////////////////////////////
int LongCompareStringW(
PLOC_HASH pHashN,
DWORD dwCmpFlags,
LPCWSTR lpString1,
int cchCount1,
LPCWSTR lpString2,
int cchCount2,
BOOL fModify)
{
int ctr1 = cchCount1; // loop counter for string 1
int ctr2 = cchCount2; // loop counter for string 2
register LPWSTR pString1; // ptr to go thru string 1
register LPWSTR pString2; // ptr to go thru string 2
BOOL IfCompress; // if compression in locale
BOOL IfDblCompress1; // if double compression in string 1
BOOL IfDblCompress2; // if double compression in string 2
BOOL fEnd1; // if at end of string 1
BOOL fIgnorePunct; // flag to ignore punctuation (not symbol)
BOOL fIgnoreDiacritic; // flag to ignore diacritics
BOOL fIgnoreSymbol; // flag to ignore symbols
BOOL fStringSort; // flag to use string sort
DWORD State; // state table
DWORD Mask; // mask for weights
DWORD Weight1; // full weight of char - string 1
DWORD Weight2; // full weight of char - string 2
int JamoFlag = FALSE;
LPCWSTR pLastJamo = lpString1;
int WhichDiacritic; // DW => 1 = str1 smaller, 3 = str2 smaller
int WhichCase; // CW => 1 = str1 smaller, 3 = str2 smaller
int WhichJamo; // XW for Jamo
int WhichPunct1; // SW => 1 = str1 smaller, 3 = str2 smaller
int WhichPunct2; // SW => 1 = str1 smaller, 3 = str2 smaller
LPWSTR pSave1; // ptr to saved pString1
LPWSTR pSave2; // ptr to saved pString2
int cExpChar1, cExpChar2; // ct of expansions in tmp
DWORD ExtraWt1, ExtraWt2; // extra weight values (for far east)
DWORD WhichExtra; // XW => wts 4, 5, 6, 7 (for far east)
//
// Initialize string pointers.
//
pString1 = (LPWSTR)lpString1;
pString2 = (LPWSTR)lpString2;
//
// Invalid Flags Check:
// - invalid flags
//
if (dwCmpFlags & CS_INVALID_FLAG)
{
SetLastError(ERROR_INVALID_FLAGS);
return (0);
}
//
// See if we should stop on the null terminator regardless of the
// count values. The original count values are stored in ctr1 and ctr2
// above, so it's ok to set these here.
//
if (dwCmpFlags & NORM_STOP_ON_NULL)
{
cchCount1 = cchCount2 = -2;
}
//
// Check if compression in the given locale. If not, then
// try a wchar by wchar compare. If strings are equal, this
// will be quick.
//
if ((IfCompress = pHashN->IfCompression) == FALSE)
{
//
// Compare each wide character in the two strings.
//
while ( NOT_END_STRING(ctr1, pString1, cchCount1) &&
NOT_END_STRING(ctr2, pString2, cchCount2) )
{
//
// See if characters are equal.
//
if (*pString1 == *pString2)
{
//
// Characters are equal, so increment pointers,
// decrement counters, and continue string compare.
//
pString1++;
pString2++;
ctr1--;
ctr2--;
}
else
{
//
// Difference was found. Fall into the sortkey
// check below.
//
break;
}
}
//
// If the end of BOTH strings has been reached, then the strings
// match exactly. Return success.
//
if ( AT_STRING_END(ctr1, pString1, cchCount1) &&
AT_STRING_END(ctr2, pString2, cchCount2) )
{
return (CSTR_EQUAL);
}
}
//
// Initialize flags, pointers, and counters.
//
fIgnorePunct = dwCmpFlags & NORM_IGNORESYMBOLS;
fIgnoreDiacritic = dwCmpFlags & NORM_IGNORENONSPACE;
fIgnoreSymbol = fIgnorePunct;
fStringSort = dwCmpFlags & SORT_STRINGSORT;
WhichDiacritic = 0;
WhichCase = 0;
WhichJamo = 0;
WhichPunct1 = 0;
WhichPunct2 = 0;
pSave1 = NULL;
pSave2 = NULL;
ExtraWt1 = (DWORD)0;
WhichExtra = (DWORD)0;
//
// Set the weights to be invalid. This flags whether or not to
// recompute the weights next time through the loop. It also flags
// whether or not to start over (continue) in the loop.
//
Weight1 = CMP_INVALID_WEIGHT;
Weight2 = CMP_INVALID_WEIGHT;
//
// Switch on the different flag options. This will speed up
// the comparisons of two strings that are different.
//
State = STATE_CW | STATE_JAMO_WEIGHT;
switch (dwCmpFlags & (NORM_IGNORECASE | NORM_IGNORENONSPACE))
{
case ( 0 ) :
{
Mask = CMP_MASKOFF_NONE;
State |= (pHashN->IfReverseDW) ? STATE_REVERSE_DW : STATE_DW;
break;
}
case ( NORM_IGNORECASE ) :
{
Mask = CMP_MASKOFF_CW;
State |= (pHashN->IfReverseDW) ? STATE_REVERSE_DW : STATE_DW;
break;
}
case ( NORM_IGNORENONSPACE ) :
{
Mask = CMP_MASKOFF_DW;
break;
}
case ( NORM_IGNORECASE | NORM_IGNORENONSPACE ) :
{
Mask = CMP_MASKOFF_DW_CW;
break;
}
}
switch (dwCmpFlags & (NORM_IGNOREKANATYPE | NORM_IGNOREWIDTH))
{
case ( 0 ) :
{
break;
}
case ( NORM_IGNOREKANATYPE ) :
{
Mask &= CMP_MASKOFF_KANA;
break;
}
case ( NORM_IGNOREWIDTH ) :
{
Mask &= CMP_MASKOFF_WIDTH;
if (dwCmpFlags & NORM_IGNORECASE)
{
REMOVE_STATE(STATE_CW);
}
break;
}
case ( NORM_IGNOREKANATYPE | NORM_IGNOREWIDTH ) :
{
Mask &= CMP_MASKOFF_KANA_WIDTH;
if (dwCmpFlags & NORM_IGNORECASE)
{
REMOVE_STATE(STATE_CW);
}
break;
}
}
//
// Compare each character's sortkey weight in the two strings.
//
while ( NOT_END_STRING(ctr1, pString1, cchCount1) &&
NOT_END_STRING(ctr2, pString2, cchCount2) )
{
if (Weight1 == CMP_INVALID_WEIGHT)
{
Weight1 = GET_DWORD_WEIGHT(pHashN, *pString1);
Weight1 &= Mask;
}
if (Weight2 == CMP_INVALID_WEIGHT)
{
Weight2 = GET_DWORD_WEIGHT(pHashN, *pString2);
Weight2 &= Mask;
}
//
// If compression locale, then need to check for compression
// characters even if the weights are equal. If it's not a
// compression locale, then we don't need to check anything
// if the weights are equal.
//
if ( (IfCompress) &&
(GET_COMPRESSION(&Weight1) || GET_COMPRESSION(&Weight2)) )
{
int ctr; // loop counter
PCOMPRESS_3 pComp3; // ptr to compress 3 table
PCOMPRESS_2 pComp2; // ptr to compress 2 table
int If1; // if compression found in string 1
int If2; // if compression found in string 2
int CompVal; // compression value
int IfEnd1; // if exists 1 more char in string 1
int IfEnd2; // if exists 1 more char in string 2
//
// Check for compression in the weights.
//
If1 = GET_COMPRESSION(&Weight1);
If2 = GET_COMPRESSION(&Weight2);
CompVal = ((If1 > If2) ? If1 : If2);
IfEnd1 = AT_STRING_END(ctr1 - 1, pString1 + 1, cchCount1);
IfEnd2 = AT_STRING_END(ctr2 - 1, pString2 + 1, cchCount2);
if (pHashN->IfDblCompression == FALSE)
{
//
// NO double compression, so don't check for it.
//
switch (CompVal)
{
//
// Check for 3 characters compressing to 1.
//
case ( COMPRESS_3_MASK ) :
{
//
// Check character in string 1 and string 2.
//
if ( ((If1) && (!IfEnd1) &&
!AT_STRING_END(ctr1 - 2, pString1 + 2, cchCount1)) ||
((If2) && (!IfEnd2) &&
!AT_STRING_END(ctr2 - 2, pString2 + 2, cchCount2)) )
{
ctr = pHashN->pCompHdr->Num3;
pComp3 = pHashN->pCompress3;
for (; ctr > 0; ctr--, pComp3++)
{
//
// Check character in string 1.
//
if ( (If1) && (!IfEnd1) &&
!AT_STRING_END(ctr1 - 2, pString1 + 2, cchCount1) &&
(pComp3->UCP1 == *pString1) &&
(pComp3->UCP2 == *(pString1 + 1)) &&
(pComp3->UCP3 == *(pString1 + 2)) )
{
//
// Found compression for string 1.
// Get new weight and mask it.
// Increment pointer and decrement counter.
//
Weight1 = MAKE_SORTKEY_DWORD(pComp3->Weights);
Weight1 &= Mask;
pString1 += 2;
ctr1 -= 2;
//
// Set boolean for string 1 - search is
// complete.
//
If1 = 0;
//
// Break out of loop if both searches are
// done.
//
if (If2 == 0)
{
break;
}
}
//
// Check character in string 2.
//
if ( (If2) && (!IfEnd2) &&
!AT_STRING_END(ctr2 - 2, pString2 + 2, cchCount2) &&
(pComp3->UCP1 == *pString2) &&
(pComp3->UCP2 == *(pString2 + 1)) &&
(pComp3->UCP3 == *(pString2 + 2)) )
{
//
// Found compression for string 2.
// Get new weight and mask it.
// Increment pointer and decrement counter.
//
Weight2 = MAKE_SORTKEY_DWORD(pComp3->Weights);
Weight2 &= Mask;
pString2 += 2;
ctr2 -= 2;
//
// Set boolean for string 2 - search is
// complete.
//
If2 = 0;
//
// Break out of loop if both searches are
// done.
//
if (If1 == 0)
{
break;
}
}
}
if (ctr > 0)
{
break;
}
}
//
// Fall through if not found.
//
}
//
// Check for 2 characters compressing to 1.
//
case ( COMPRESS_2_MASK ) :
{
//
// Check character in string 1 and string 2.
//
if ( ((If1) && (!IfEnd1)) ||
((If2) && (!IfEnd2)) )
{
ctr = pHashN->pCompHdr->Num2;
pComp2 = pHashN->pCompress2;
for (; ((ctr > 0) && (If1 || If2)); ctr--, pComp2++)
{
//
// Check character in string 1.
//
if ( (If1) &&
(!IfEnd1) &&
(pComp2->UCP1 == *pString1) &&
(pComp2->UCP2 == *(pString1 + 1)) )
{
//
// Found compression for string 1.
// Get new weight and mask it.
// Increment pointer and decrement counter.
//
Weight1 = MAKE_SORTKEY_DWORD(pComp2->Weights);
Weight1 &= Mask;
pString1++;
ctr1--;
//
// Set boolean for string 1 - search is
// complete.
//
If1 = 0;
//
// Break out of loop if both searches are
// done.
//
if (If2 == 0)
{
break;
}
}
//
// Check character in string 2.
//
if ( (If2) &&
(!IfEnd2) &&
(pComp2->UCP1 == *pString2) &&
(pComp2->UCP2 == *(pString2 + 1)) )
{
//
// Found compression for string 2.
// Get new weight and mask it.
// Increment pointer and decrement counter.
//
Weight2 = MAKE_SORTKEY_DWORD(pComp2->Weights);
Weight2 &= Mask;
pString2++;
ctr2--;
//
// Set boolean for string 2 - search is
// complete.
//
If2 = 0;
//
// Break out of loop if both searches are
// done.
//
if (If1 == 0)
{
break;
}
}
}
if (ctr > 0)
{
break;
}
}
}
}
}
else if (!IfEnd1 && !IfEnd2)
{
//
// Double Compression exists, so must check for it.
//
if (IfDblCompress1 =
((GET_DWORD_WEIGHT(pHashN, *pString1) & CMP_MASKOFF_CW) ==
(GET_DWORD_WEIGHT(pHashN, *(pString1 + 1)) & CMP_MASKOFF_CW)))
{
//
// Advance past the first code point to get to the
// compression character.
//
pString1++;
ctr1--;
IfEnd1 = AT_STRING_END(ctr1 - 1, pString1 + 1, cchCount1);
}
if (IfDblCompress2 =
((GET_DWORD_WEIGHT(pHashN, *pString2) & CMP_MASKOFF_CW) ==
(GET_DWORD_WEIGHT(pHashN, *(pString2 + 1)) & CMP_MASKOFF_CW)))
{
//
// Advance past the first code point to get to the
// compression character.
//
pString2++;
ctr2--;
IfEnd2 = AT_STRING_END(ctr2 - 1, pString2 + 1, cchCount2);
}
switch (CompVal)
{
//
// Check for 3 characters compressing to 1.
//
case ( COMPRESS_3_MASK ) :
{
//
// Check character in string 1.
//
if ( (If1) && (!IfEnd1) &&
!AT_STRING_END(ctr1 - 2, pString1 + 2, cchCount1) )
{
ctr = pHashN->pCompHdr->Num3;
pComp3 = pHashN->pCompress3;
for (; ctr > 0; ctr--, pComp3++)
{
//
// Check character in string 1.
//
if ( (pComp3->UCP1 == *pString1) &&
(pComp3->UCP2 == *(pString1 + 1)) &&
(pComp3->UCP3 == *(pString1 + 2)) )
{
//
// Found compression for string 1.
// Get new weight and mask it.
// Increment pointer and decrement counter.
//
Weight1 = MAKE_SORTKEY_DWORD(pComp3->Weights);
Weight1 &= Mask;
if (!IfDblCompress1)
{
pString1 += 2;
ctr1 -= 2;
}
//
// Set boolean for string 1 - search is
// complete.
//
If1 = 0;
break;
}
}
}
//
// Check character in string 2.
//
if ( (If2) && (!IfEnd2) &&
!AT_STRING_END(ctr2 - 2, pString2 + 2, cchCount2) )
{
ctr = pHashN->pCompHdr->Num3;
pComp3 = pHashN->pCompress3;
for (; ctr > 0; ctr--, pComp3++)
{
//
// Check character in string 2.
//
if ( (pComp3->UCP1 == *pString2) &&
(pComp3->UCP2 == *(pString2 + 1)) &&
(pComp3->UCP3 == *(pString2 + 2)) )
{
//
// Found compression for string 2.
// Get new weight and mask it.
// Increment pointer and decrement counter.
//
Weight2 = MAKE_SORTKEY_DWORD(pComp3->Weights);
Weight2 &= Mask;
if (!IfDblCompress2)
{
pString2 += 2;
ctr2 -= 2;
}
//
// Set boolean for string 2 - search is
// complete.
//
If2 = 0;
break;
}
}
}
//
// Fall through if not found.
//
if ((If1 == 0) && (If2 == 0))
{
break;
}
}
//
// Check for 2 characters compressing to 1.
//
case ( COMPRESS_2_MASK ) :
{
//
// Check character in string 1.
//
if ((If1) && (!IfEnd1))
{
ctr = pHashN->pCompHdr->Num2;
pComp2 = pHashN->pCompress2;
for (; ctr > 0; ctr--, pComp2++)
{
//
// Check character in string 1.
//
if ((pComp2->UCP1 == *pString1) &&
(pComp2->UCP2 == *(pString1 + 1)))
{
//
// Found compression for string 1.
// Get new weight and mask it.
// Increment pointer and decrement counter.
//
Weight1 = MAKE_SORTKEY_DWORD(pComp2->Weights);
Weight1 &= Mask;
if (!IfDblCompress1)
{
pString1++;
ctr1--;
}
//
// Set boolean for string 1 - search is
// complete.
//
If1 = 0;
break;
}
}
}
//
// Check character in string 2.
//
if ((If2) && (!IfEnd2))
{
ctr = pHashN->pCompHdr->Num2;
pComp2 = pHashN->pCompress2;
for (; ctr > 0; ctr--, pComp2++)
{
//
// Check character in string 2.
//
if ((pComp2->UCP1 == *pString2) &&
(pComp2->UCP2 == *(pString2 + 1)))
{
//
// Found compression for string 2.
// Get new weight and mask it.
// Increment pointer and decrement counter.
//
Weight2 = MAKE_SORTKEY_DWORD(pComp2->Weights);
Weight2 &= Mask;
if (!IfDblCompress2)
{
pString2++;
ctr2--;
}
//
// Set boolean for string 2 - search is
// complete.
//
If2 = 0;
break;
}
}
}
}
}
//
// Reset the pointer back to the beginning of the double
// compression. Pointer fixup at the end will advance
// them correctly.
//
// If double compression, we advanced the pointer at
// the beginning of the switch statement. If double
// compression character was actually found, the pointer
// was NOT advanced. We now want to decrement the pointer
// to put it back to where it was.
//
// The next time through, the pointer will be pointing to
// the regular compression part of the string.
//
if (IfDblCompress1)
{
pString1--;
ctr1++;
}
if (IfDblCompress2)
{
pString2--;
ctr2++;
}
}
}
//
// Check the weights again.
//
if ((Weight1 != Weight2) ||
(GET_SCRIPT_MEMBER(&Weight1) == EXTENSION_A))
{
//
// Weights are still not equal, even after compression
// check, so compare the different weights.
//
BYTE sm1 = GET_SCRIPT_MEMBER(&Weight1); // script member 1
BYTE sm2 = GET_SCRIPT_MEMBER(&Weight2); // script member 2
WORD uw1 = GET_UNICODE_SM_MOD(&Weight1, sm1, fModify); // unicode weight 1
WORD uw2 = GET_UNICODE_SM_MOD(&Weight2, sm2, fModify); // unicode weight 2
BYTE dw1; // diacritic weight 1
BYTE dw2; // diacritic weight 2
DWORD Wt; // temp weight holder
WCHAR pTmpBuf1[MAX_TBL_EXPANSION]; // temp buffer for exp 1
WCHAR pTmpBuf2[MAX_TBL_EXPANSION]; // temp buffer for exp 2
//
// If Unicode Weights are different and no special cases,
// then we're done. Otherwise, we need to do extra checking.
//
// Must check ENTIRE string for any possibility of Unicode Weight
// differences. As soon as a Unicode Weight difference is found,
// then we're done. If no UW difference is found, then the
// first Diacritic Weight difference is used. If no DW difference
// is found, then use the first Case Difference. If no CW
// difference is found, then use the first Extra Weight
// difference. If no XW difference is found, then use the first
// Special Weight difference.
//
if ((uw1 != uw2) ||
((sm1 <= SYMBOL_5) && (sm1 >= FAREAST_SPECIAL)))
{
//
// Check for Unsortable characters and skip them.
// This needs to be outside the switch statement. If EITHER
// character is unsortable, must skip it and start over.
//
if (sm1 == UNSORTABLE)
{
pString1++;
ctr1--;
Weight1 = CMP_INVALID_WEIGHT;
}
if (sm2 == UNSORTABLE)
{
pString2++;
ctr2--;
Weight2 = CMP_INVALID_WEIGHT;
}
//
// Check for Ignore Nonspace and Ignore Symbol. If
// Ignore Nonspace is set and either character is a
// nonspace mark only, then we need to advance the
// pointer to skip over the character and continue.
// If Ignore Symbol is set and either character is a
// punctuation char, then we need to advance the
// pointer to skip over the character and continue.
//
// This step is necessary so that a string with a
// nonspace mark and a punctuation char following one
// another are properly ignored when one or both of
// the ignore flags is set.
//
if (fIgnoreDiacritic)
{
if (sm1 == NONSPACE_MARK)
{
pString1++;
ctr1--;
Weight1 = CMP_INVALID_WEIGHT;
}
if (sm2 == NONSPACE_MARK)
{
pString2++;
ctr2--;
Weight2 = CMP_INVALID_WEIGHT;
}
}
if (fIgnoreSymbol)
{
if (sm1 == PUNCTUATION)
{
pString1++;
ctr1--;
Weight1 = CMP_INVALID_WEIGHT;
}
if (sm2 == PUNCTUATION)
{
pString2++;
ctr2--;
Weight2 = CMP_INVALID_WEIGHT;
}
}
if ((Weight1 == CMP_INVALID_WEIGHT) || (Weight2 == CMP_INVALID_WEIGHT))
{
continue;
}
//
// Switch on the script member of string 1 and take care
// of any special cases.
//
switch (sm1)
{
case ( NONSPACE_MARK ) :
{
//
// Nonspace only - look at diacritic weight only.
//
if (!fIgnoreDiacritic)
{
if ((WhichDiacritic == 0) ||
(State & STATE_REVERSE_DW))
{
WhichDiacritic = CSTR_GREATER_THAN;
//
// Remove state from state machine.
//
REMOVE_STATE(STATE_DW);
}
}
//
// Adjust pointer and counter and set flags.
//
pString1++;
ctr1--;
Weight1 = CMP_INVALID_WEIGHT;
break;
}
case ( SYMBOL_1 ) :
case ( SYMBOL_2 ) :
case ( SYMBOL_3 ) :
case ( SYMBOL_4 ) :
case ( SYMBOL_5 ) :
{
//
// If the ignore symbol flag is set, then skip over
// the symbol.
//
if (fIgnoreSymbol)
{
pString1++;
ctr1--;
Weight1 = CMP_INVALID_WEIGHT;
}
break;
}
case ( PUNCTUATION ) :
{
//
// If the ignore punctuation flag is set, then skip
// over the punctuation char.
//
if (fIgnorePunct)
{
pString1++;
ctr1--;
Weight1 = CMP_INVALID_WEIGHT;
}
else if (!fStringSort)
{
//
// Use WORD sort method.
//
if (sm2 != PUNCTUATION)
{
//
// The character in the second string is
// NOT punctuation.
//
if (WhichPunct2)
{
//
// Set WP 2 to show that string 2 is
// smaller, since a punctuation char had
// already been found at an earlier
// position in string 2.
//
// Set the Ignore Punctuation flag so we
// just skip over any other punctuation
// chars in the string.
//
WhichPunct2 = CSTR_GREATER_THAN;
fIgnorePunct = TRUE;
}
else
{
//
// Set WP 1 to show that string 2 is
// smaller, and that string 1 has had
// a punctuation char - since no
// punctuation chars have been found
// in string 2.
//
WhichPunct1 = CSTR_GREATER_THAN;
}
//
// Advance pointer 1 and decrement counter 1.
//
pString1++;
ctr1--;
Weight1 = CMP_INVALID_WEIGHT;
}
//
// Do NOT want to advance the pointer in string 1
// if string 2 is also a punctuation char. This
// will be done later.
//
}
break;
}
case ( EXPANSION ) :
{
//
// Save pointer in pString1 so that it can be
// restored.
//
if (pSave1 == NULL)
{
pSave1 = pString1;
}
pString1 = pTmpBuf1;
//
// Add one to counter so that subtraction doesn't end
// comparison prematurely.
//
ctr1++;
//
// Expand character into temporary buffer.
//
pTmpBuf1[0] = GET_EXPANSION_1(&Weight1);
pTmpBuf1[1] = GET_EXPANSION_2(&Weight1);
//
// Set cExpChar1 to the number of expansion characters
// stored.
//
cExpChar1 = MAX_TBL_EXPANSION;
Weight1 = CMP_INVALID_WEIGHT;
break;
}
case ( FAREAST_SPECIAL ) :
{
if (sm2 != EXPANSION)
{
//
// Get the weight for the far east special case
// and store it in Weight1.
//
GET_FAREAST_WEIGHT( Weight1,
uw1,
Mask,
lpString1,
pString1,
ExtraWt1,
fModify );
if (sm2 != FAREAST_SPECIAL)
{
//
// The character in the second string is
// NOT a fareast special char.
//
// Set each of weights 4, 5, 6, and 7 to show
// that string 2 is smaller (if not already set).
//
if ((GET_WT_FOUR(&WhichExtra) == 0) &&
(GET_WT_FOUR(&ExtraWt1) != 0))
{
GET_WT_FOUR(&WhichExtra) = CSTR_GREATER_THAN;
}
if ((GET_WT_FIVE(&WhichExtra) == 0) &&
(GET_WT_FIVE(&ExtraWt1) != 0))
{
GET_WT_FIVE(&WhichExtra) = CSTR_GREATER_THAN;
}
if ((GET_WT_SIX(&WhichExtra) == 0) &&
(GET_WT_SIX(&ExtraWt1) != 0))
{
GET_WT_SIX(&WhichExtra) = CSTR_GREATER_THAN;
}
if ((GET_WT_SEVEN(&WhichExtra) == 0) &&
(GET_WT_SEVEN(&ExtraWt1) != 0))
{
GET_WT_SEVEN(&WhichExtra) = CSTR_GREATER_THAN;
}
}
}
break;
}
case ( JAMO_SPECIAL ) :
{
LPWSTR pStr1 = pString1;
LPWSTR pStr2 = pString2;
//
// Set the JamoFlag so we don't handle it again.
//
JamoFlag = TRUE;
FindJamoDifference(
pHashN,
&pStr1, &ctr1, cchCount1, &Weight1,
&pStr2, &ctr2, cchCount2, &Weight2,
&pLastJamo,
&uw1, &uw2,
&State,
&WhichJamo,
fModify );
if (WhichJamo)
{
return (WhichJamo);
}
pString1 = pStr1;
pString2 = pStr2;
break;
}
case ( EXTENSION_A ) :
{
//
// Get the full weight in case DW got masked.
//
Weight1 = GET_DWORD_WEIGHT(pHashN, *pString1);
if (sm2 == EXTENSION_A)
{
Weight2 = GET_DWORD_WEIGHT(pHashN, *pString2);
}
//
// Compare the weights.
//
if (Weight1 == Weight2)
{
//
// Adjust pointers and counters and set flags.
//
pString1++; pString2++;
ctr1--; ctr2--;
Weight1 = CMP_INVALID_WEIGHT;
Weight2 = CMP_INVALID_WEIGHT;
}
else
{
//
// Get the actual UW to compare.
//
if (sm2 == EXTENSION_A)
{
//
// Set the UW values to be the AW and DW since
// both strings contain an extension A char.
//
uw1 = MAKE_UNICODE_WT( GET_ALPHA_NUMERIC(&Weight1),
GET_DIACRITIC(&Weight1),
FALSE );
uw2 = MAKE_UNICODE_WT( GET_ALPHA_NUMERIC(&Weight2),
GET_DIACRITIC(&Weight2),
FALSE );
}
else
{
//
// Only string1 contains an extension A char,
// so set the UW value to be the first UW
// value for extension A (default values):
// SM_EXT_A, AW_EXT_A
//
uw1 = MAKE_UNICODE_WT(SM_EXT_A, AW_EXT_A, fModify);
}
}
break;
}
case ( UNSORTABLE ) :
{
//
// Fill out the case statement so the compiler
// will use a jump table.
//
break;
}
}
//
// Switch on the script member of string 2 and take care
// of any special cases.
//
switch (sm2)
{
case ( NONSPACE_MARK ) :
{
//
// Nonspace only - look at diacritic weight only.
//
if (!fIgnoreDiacritic)
{
if ((WhichDiacritic == 0) ||
(State & STATE_REVERSE_DW))
{
WhichDiacritic = CSTR_LESS_THAN;
//
// Remove state from state machine.
//
REMOVE_STATE(STATE_DW);
}
}
//
// Adjust pointer and counter and set flags.
//
pString2++;
ctr2--;
Weight2 = CMP_INVALID_WEIGHT;
break;
}
case ( SYMBOL_1 ) :
case ( SYMBOL_2 ) :
case ( SYMBOL_3 ) :
case ( SYMBOL_4 ) :
case ( SYMBOL_5 ) :
{
//
// If the ignore symbol flag is set, then skip over
// the symbol.
//
if (fIgnoreSymbol)
{
pString2++;
ctr2--;
Weight2 = CMP_INVALID_WEIGHT;
}
break;
}
case ( PUNCTUATION ) :
{
//
// If the ignore punctuation flag is set, then
// skip over the punctuation char.
//
if (fIgnorePunct)
{
//
// Advance pointer 2 and decrement counter 2.
//
pString2++;
ctr2--;
Weight2 = CMP_INVALID_WEIGHT;
}
else if (!fStringSort)
{
//
// Use WORD sort method.
//
if (sm1 != PUNCTUATION)
{
//
// The character in the first string is
// NOT punctuation.
//
if (WhichPunct1)
{
//
// Set WP 1 to show that string 1 is
// smaller, since a punctuation char had
// already been found at an earlier
// position in string 1.
//
// Set the Ignore Punctuation flag so we
// just skip over any other punctuation
// chars in the string.
//
WhichPunct1 = CSTR_LESS_THAN;
fIgnorePunct = TRUE;
}
else
{
//
// Set WP 2 to show that string 1 is
// smaller, and that string 2 has had
// a punctuation char - since no
// punctuation chars have been found
// in string 1.
//
WhichPunct2 = CSTR_LESS_THAN;
}
//
// Pointer 2 and counter 2 will be updated
// after if-else statement.
//
}
else
{
//
// Both code points are punctuation chars.
//
// See if either of the strings has encountered
// punctuation chars previous to this.
//
if (WhichPunct1)
{
//
// String 1 has had a punctuation char, so
// it should be the smaller string (since
// both have punctuation chars).
//
WhichPunct1 = CSTR_LESS_THAN;
}
else if (WhichPunct2)
{
//
// String 2 has had a punctuation char, so
// it should be the smaller string (since
// both have punctuation chars).
//
WhichPunct2 = CSTR_GREATER_THAN;
}
else
{
BYTE aw1 = GET_ALPHA_NUMERIC(&Weight1);
BYTE aw2 = GET_ALPHA_NUMERIC(&Weight2);
if (aw1 == aw2)
{
BYTE cw1 = GET_CASE(&Weight1);
BYTE cw2 = GET_CASE(&Weight2);
if (cw1 < cw2)
{
WhichPunct1 = CSTR_LESS_THAN;
} else if (cw1 > cw2)
{
WhichPunct1 = CSTR_GREATER_THAN;
}
} else
{
//
// Position is the same, so compare the
// special weights. Set WhichPunct1 to
// the smaller special weight.
//
WhichPunct1 = (aw1 < aw2
? CSTR_LESS_THAN
: CSTR_GREATER_THAN);
}
}
//
// Set the Ignore Punctuation flag.
//
fIgnorePunct = TRUE;
//
// Advance pointer 1 and decrement counter 1.
// Pointer 2 and counter 2 will be updated
// after if-else statement.
//
pString1++;
ctr1--;
Weight1 = CMP_INVALID_WEIGHT;
}
//
// Advance pointer 2 and decrement counter 2.
//
pString2++;
ctr2--;
Weight2 = CMP_INVALID_WEIGHT;
}
break;
}
case ( EXPANSION ) :
{
//
// Save pointer in pString1 so that it can be restored.
//
if (pSave2 == NULL)
{
pSave2 = pString2;
}
pString2 = pTmpBuf2;
//
// Add one to counter so that subtraction doesn't end
// comparison prematurely.
//
ctr2++;
//
// Expand character into temporary buffer.
//
pTmpBuf2[0] = GET_EXPANSION_1(&Weight2);
pTmpBuf2[1] = GET_EXPANSION_2(&Weight2);
//
// Set cExpChar2 to the number of expansion characters
// stored.
//
cExpChar2 = MAX_TBL_EXPANSION;
Weight2 = CMP_INVALID_WEIGHT;
break;
}
case ( FAREAST_SPECIAL ) :
{
if (sm1 != EXPANSION)
{
//
// Get the weight for the far east special case
// and store it in Weight2.
//
GET_FAREAST_WEIGHT( Weight2,
uw2,
Mask,
lpString2,
pString2,
ExtraWt2,
fModify );
if (sm1 != FAREAST_SPECIAL)
{
//
// The character in the first string is
// NOT a fareast special char.
//
// Set each of weights 4, 5, 6, and 7 to show
// that string 1 is smaller (if not already set).
//
if ((GET_WT_FOUR(&WhichExtra) == 0) &&
(GET_WT_FOUR(&ExtraWt2) != 0))
{
GET_WT_FOUR(&WhichExtra) = CSTR_LESS_THAN;
}
if ((GET_WT_FIVE(&WhichExtra) == 0) &&
(GET_WT_FIVE(&ExtraWt2) != 0))
{
GET_WT_FIVE(&WhichExtra) = CSTR_LESS_THAN;
}
if ((GET_WT_SIX(&WhichExtra) == 0) &&
(GET_WT_SIX(&ExtraWt2) != 0))
{
GET_WT_SIX(&WhichExtra) = CSTR_LESS_THAN;
}
if ((GET_WT_SEVEN(&WhichExtra) == 0) &&
(GET_WT_SEVEN(&ExtraWt2) != 0))
{
GET_WT_SEVEN(&WhichExtra) = CSTR_LESS_THAN;
}
}
else
{
//
// Characters in both strings are fareast
// special chars.
//
// Set each of weights 4, 5, 6, and 7
// appropriately (if not already set).
//
if ( (GET_WT_FOUR(&WhichExtra) == 0) &&
( GET_WT_FOUR(&ExtraWt1) !=
GET_WT_FOUR(&ExtraWt2) ) )
{
GET_WT_FOUR(&WhichExtra) =
( GET_WT_FOUR(&ExtraWt1) <
GET_WT_FOUR(&ExtraWt2) )
? CSTR_LESS_THAN
: CSTR_GREATER_THAN;
}
if ( (GET_WT_FIVE(&WhichExtra) == 0) &&
( GET_WT_FIVE(&ExtraWt1) !=
GET_WT_FIVE(&ExtraWt2) ) )
{
GET_WT_FIVE(&WhichExtra) =
( GET_WT_FIVE(&ExtraWt1) <
GET_WT_FIVE(&ExtraWt2) )
? CSTR_LESS_THAN
: CSTR_GREATER_THAN;
}
if ( (GET_WT_SIX(&WhichExtra) == 0) &&
( GET_WT_SIX(&ExtraWt1) !=
GET_WT_SIX(&ExtraWt2) ) )
{
GET_WT_SIX(&WhichExtra) =
( GET_WT_SIX(&ExtraWt1) <
GET_WT_SIX(&ExtraWt2) )
? CSTR_LESS_THAN
: CSTR_GREATER_THAN;
}
if ( (GET_WT_SEVEN(&WhichExtra) == 0) &&
( GET_WT_SEVEN(&ExtraWt1) !=
GET_WT_SEVEN(&ExtraWt2) ) )
{
GET_WT_SEVEN(&WhichExtra) =
( GET_WT_SEVEN(&ExtraWt1) <
GET_WT_SEVEN(&ExtraWt2) )
? CSTR_LESS_THAN
: CSTR_GREATER_THAN;
}
}
}
break;
}
case ( JAMO_SPECIAL ) :
{
if (!JamoFlag)
{
LPWSTR pStr1 = pString1;
LPWSTR pStr2 = pString2;
FindJamoDifference(
pHashN,
&pStr1, &ctr1, cchCount1, &Weight1,
&pStr2, &ctr2, cchCount2, &Weight2,
&pLastJamo,
&uw1, &uw2,
&State,
&WhichJamo,
fModify );
if (WhichJamo)
{
return (WhichJamo);
}
pString1 = pStr1;
pString2 = pStr2;
}
else
{
//
// Reset the Jamo flag.
//
JamoFlag = FALSE;
}
break;
}
case ( EXTENSION_A ) :
{
//
// If sm1 is an extension A character, then
// both sm1 and sm2 have been handled. We should
// only get here when either sm1 is not an
// extension A character or the two extension A
// characters are different.
//
if (sm1 != EXTENSION_A)
{
//
// Get the full weight in case DW got masked.
// Also, get the actual UW to compare.
//
// Only string2 contains an extension A char,
// so set the UW value to be the first UW
// value for extension A (default values):
// SM_EXT_A, AW_EXT_A
//
Weight2 = GET_DWORD_WEIGHT(pHashN, *pString2);
uw2 = MAKE_UNICODE_WT(SM_EXT_A, AW_EXT_A, fModify);
}
//
// We should then fall through to the comparison
// of the Unicode weights.
//
break;
}
case ( UNSORTABLE ) :
{
//
// Fill out the case statement so the compiler
// will use a jump table.
//
break;
}
}
//
// See if the comparison should start again.
//
if ((Weight1 == CMP_INVALID_WEIGHT) || (Weight2 == CMP_INVALID_WEIGHT))
{
//
// Check to see if we're modifying the script value.
// If so, then we need to reset the fareast weight
// (if applicable) so that it doesn't get modified
// again.
//
if (fModify == TRUE)
{
if (sm1 == FAREAST_SPECIAL)
{
Weight1 = CMP_INVALID_WEIGHT;
}
else if (sm2 == FAREAST_SPECIAL)
{
Weight2 = CMP_INVALID_WEIGHT;
}
}
continue;
}
//
// We're not supposed to drop down into the state table if
// the unicode weights are different, so stop comparison
// and return result of unicode weight comparison.
//
if (uw1 != uw2)
{
return ((uw1 < uw2) ? CSTR_LESS_THAN : CSTR_GREATER_THAN);
}
}
//
// For each state in the state table, do the appropriate
// comparisons.
//
if (State & (STATE_DW | STATE_REVERSE_DW))
{
//
// Get the diacritic weights.
//
dw1 = GET_DIACRITIC(&Weight1);
dw2 = GET_DIACRITIC(&Weight2);
if (dw1 != dw2)
{
//
// Look ahead to see if diacritic follows a
// minimum diacritic weight. If so, get the
// diacritic weight of the nonspace mark.
//
while (!AT_STRING_END(ctr1 - 1, pString1 + 1, cchCount1))
{
Wt = GET_DWORD_WEIGHT(pHashN, *(pString1 + 1));
if (GET_SCRIPT_MEMBER(&Wt) == NONSPACE_MARK)
{
dw1 += GET_DIACRITIC(&Wt);
pString1++;
ctr1--;
}
else
{
break;
}
}
while (!AT_STRING_END(ctr2 - 1, pString2 + 1, cchCount2))
{
Wt = GET_DWORD_WEIGHT(pHashN, *(pString2 + 1));
if (GET_SCRIPT_MEMBER(&Wt) == NONSPACE_MARK)
{
dw2 += GET_DIACRITIC(&Wt);
pString2++;
ctr2--;
}
else
{
break;
}
}
//
// Save which string has the smaller diacritic
// weight if the diacritic weights are still
// different.
//
if (dw1 != dw2)
{
WhichDiacritic = (dw1 < dw2)
? CSTR_LESS_THAN
: CSTR_GREATER_THAN;
//
// Remove state from state machine.
//
REMOVE_STATE(STATE_DW);
}
}
}
if (State & STATE_CW)
{
//
// Get the case weights.
//
if (GET_CASE(&Weight1) != GET_CASE(&Weight2))
{
//
// Save which string has the smaller case weight.
//
WhichCase = (GET_CASE(&Weight1) < GET_CASE(&Weight2))
? CSTR_LESS_THAN
: CSTR_GREATER_THAN;
//
// Remove state from state machine.
//
REMOVE_STATE(STATE_CW);
}
}
}
//
// Fixup the pointers and counters.
//
POINTER_FIXUP();
ctr1--;
ctr2--;
//
// Reset the weights to be invalid.
//
Weight1 = CMP_INVALID_WEIGHT;
Weight2 = CMP_INVALID_WEIGHT;
}
//
// If the end of BOTH strings has been reached, then the unicode
// weights match exactly. Check the diacritic, case and special
// weights. If all are zero, then return success. Otherwise,
// return the result of the weight difference.
//
// NOTE: The following checks MUST REMAIN IN THIS ORDER:
// Diacritic, Case, Punctuation.
//
if (AT_STRING_END(ctr1, pString1, cchCount1))
{
if (AT_STRING_END(ctr2, pString2, cchCount2))
{
if (WhichDiacritic)
{
return (WhichDiacritic);
}
if (WhichCase)
{
return (WhichCase);
}
if (WhichExtra)
{
if (!fIgnoreDiacritic)
{
if (GET_WT_FOUR(&WhichExtra))
{
return (GET_WT_FOUR(&WhichExtra));
}
if (GET_WT_FIVE(&WhichExtra))
{
return (GET_WT_FIVE(&WhichExtra));
}
}
if (GET_WT_SIX(&WhichExtra))
{
return (GET_WT_SIX(&WhichExtra));
}
if (GET_WT_SEVEN(&WhichExtra))
{
return (GET_WT_SEVEN(&WhichExtra));
}
}
if (WhichPunct1)
{
return (WhichPunct1);
}
if (WhichPunct2)
{
return (WhichPunct2);
}
return (CSTR_EQUAL);
}
else
{
//
// String 2 is longer.
//
pString1 = pString2;
ctr1 = ctr2;
cchCount1 = cchCount2;
fEnd1 = CSTR_LESS_THAN;
}
}
else
{
fEnd1 = CSTR_GREATER_THAN;
}
//
// Scan to the end of the longer string.
//
SCAN_LONGER_STRING( ctr1,
pString1,
cchCount1,
fEnd1 );
}
////////////////////////////////////////////////////////////////////////////
//
// FindJamoDifference
//
////////////////////////////////////////////////////////////////////////////
int FindJamoDifference(
PLOC_HASH pHashN,
LPCWSTR* ppString1, int* ctr1, int cchCount1, DWORD* pWeight1,
LPCWSTR* ppString2, int* ctr2, int cchCount2, DWORD* pWeight2,
LPCWSTR* pLastJamo,
WORD* uw1,
WORD* uw2,
int* pState,
int* WhichJamo,
BOOL fModify)
{
int bRestart = 0; // if string compare should restart again
int oldHangulsFound1 = 0; // # of valid old Hangul Jamo compositions found
int oldHangulsFound2 = 0; // # of valid old Hangul Jamo compositions found
WORD UW;
BYTE JamoWeight1[3]; // extra weight for first old Hangul composition
BYTE JamoWeight2[3]; // extra weight for second old Hangul composition
//
// Roll back to the first Jamo. We know that these Jamos in both strings
// should be equal, so we can decrement both strings at once.
//
while ((*ppString1 > *pLastJamo) && IsJamo(*(*ppString1 - 1)))
{
(*ppString1)--; (*ppString2)--; (*ctr1)++; (*ctr2)++;
}
//
// Now we are at the beginning of two groups of Jamo characters.
// Compare Jamo unit (either a single Jamo or a valid old Hangul Jamo
// composition) until we run out Jamo units in either strings.
// We also exit when we reach the ends of either string.
//
// while (NOT_END_STRING(*ctr1, *ppString1, cchCount1) &&
// NOT_END_STRING(*ctr2, *ppString2, cchCount2))
//
for (;;)
{
if (IsJamo(**ppString1))
{
if (IsLeadingJamo(**ppString1))
{
if ((oldHangulsFound1 = MapOldHangulSortKey( pHashN,
*ppString1,
*ctr1,
&UW,
JamoWeight1,
fModify )) > 0)
{
*uw1 = UW;
//
// Mark *pWeight1 so that it is not CMP_INVALID_WEIGHT.
// 0202 is the DW/CW.
//
*pWeight1 = ((DWORD)UW | 0x02020000);
//
// We always increment ppString1/ctr1 at the end of the
// loop, so we need to subtract 1 here.
//
*ppString1 += (oldHangulsFound1 - 1);
*ctr1 -= (oldHangulsFound1 - 1);
}
}
if (oldHangulsFound1 == 0)
{
//
// No valid old Hangul compositions are found. Get the UW
// for the Jamo instead.
//
*pWeight1 = GET_DWORD_WEIGHT(pHashN, **ppString1);
//
// The SMs in PSORTKEY for Jamos are not really SMs. They
// are all 4 (for JAMO_SPECIAL).
// Here we get the real Jamo Unicode weight. The actual SM
// is stored in DW.
//
*uw1 = MAKE_UNICODE_WT( GET_DIACRITIC(pWeight1),
GET_ALPHA_NUMERIC(pWeight1),
fModify );
((PSORTKEY)pWeight1)->Diacritic = MIN_DW;
}
}
if (IsJamo(**ppString2))
{
if (IsLeadingJamo(**ppString2))
{
if ((oldHangulsFound2 = MapOldHangulSortKey( pHashN,
*ppString2,
*ctr2,
&UW,
JamoWeight2,
fModify )) > 0)
{
*uw2 = UW;
*pWeight2 = ((DWORD)UW | 0x02020000);
*ppString2 += (oldHangulsFound2 - 1);
*ctr2 -= (oldHangulsFound2 - 1);
}
}
if (oldHangulsFound2 == 0)
{
*pWeight2 = GET_DWORD_WEIGHT(pHashN, **ppString2);
*uw2 = MAKE_UNICODE_WT( GET_DIACRITIC(pWeight2),
GET_ALPHA_NUMERIC(pWeight2),
fModify );
((PSORTKEY)pWeight2)->Diacritic = MIN_DW;
}
}
//
// See if either weight is invalid.
// A weight can be invalid when the character is not a Jamo.
//
if (*pWeight1 == CMP_INVALID_WEIGHT)
{
//
// The current character is not a Jamo. Set the Weight to
// be CMP_INVALID_WEIGHT, so that the string comparision can
// restart within the loop of CompareString().
//
*pWeight1 = CMP_INVALID_WEIGHT;
bRestart = 1;
goto FindJamoDifferenceExit;
}
if (*pWeight2 == CMP_INVALID_WEIGHT)
{
//
// The current character is not a Jamo. Set the Weight to
// be CMP_INVALID_WEIGHT, so that the string comparision can
// restart within the loop of CompareString().
//
*pWeight2 = CMP_INVALID_WEIGHT;
bRestart = 1;
goto FindJamoDifferenceExit;
}
if (*uw1 != *uw2)
{
//
// Found differences in Unicode weight. We can stop the
// processing now.
//
goto FindJamoDifferenceExit;
}
//
// When we get here, we know that we have the same Unicode Weight.
// Check if we need to record the WhichJamo.
//
if ((*pState & STATE_JAMO_WEIGHT) &&
((oldHangulsFound1 > 0) || (oldHangulsFound2 > 0)))
{
if ((oldHangulsFound1 > 0) && (oldHangulsFound2 > 0))
{
*WhichJamo = (int)memcmp( JamoWeight1,
JamoWeight2,
sizeof(JamoWeight1) ) + 2;
}
else if (oldHangulsFound1 > 0)
{
*WhichJamo = CSTR_GREATER_THAN;
}
else
{
*WhichJamo = CSTR_LESS_THAN;
}
*pState &= ~STATE_JAMO_WEIGHT;
oldHangulsFound1 = oldHangulsFound2 = 0;
}
(*ppString1)++; (*ctr1)--;
(*ppString2)++; (*ctr2)--;
if (AT_STRING_END(*ctr1, *ppString1, cchCount1) ||
AT_STRING_END(*ctr2, *ppString2, cchCount2))
{
break;
}
*pWeight1 = *pWeight2 = CMP_INVALID_WEIGHT;
}
//
// If we drop out of the while loop because we reach the end of strings,
// decrement the pointers by one because loops in CompareString() will
// increase the pointers at the end of the loop.
//
// If we drop out of the while loop because the goto's in it, we are
// already off by one.
//
if (AT_STRING_END(*ctr1, *ppString1, cchCount1))
{
(*ppString1)--; (*ctr1)++;
}
if (AT_STRING_END(*ctr2, *ppString2, cchCount2))
{
(*ppString2)--; (*ctr2)++;
}
FindJamoDifferenceExit:
*pLastJamo = *ppString1;
return (bRestart);
}