windows-nt/Source/XPSP1/NT/base/win32/winnls/jamo.c
2020-09-26 16:20:57 +08:00

401 lines
13 KiB
C

/*++
Copyright (c) 1991-2000, Microsoft Corporation All rights reserved.
Module Name:
jamo.c
Abstract:
This file contains functions that deal with the sorting of old Hangul.
Korean characters (Hangul) can be composed by Jamos (U+1100 - U+11ff).
However, some valid compositions of Jamo are not found in mordern
Hangul (U+AC00 - U+D7AF).
These valid compositions are called old Hangul.
MapOldHangulSortKey() is called by CompareString() and MapSortKey() to
handle the sorting of old Hangul.
Note:
The Jamo composition means that several Jamo (Korean alpahbetic) composed
a valid Hangul character or old Hangul character.
Eg. U+1100 U+1103 U+1161 U+11a8 composes a valid old Hangul character.
The following are data members of the global structure pTblPtrs used by
old Hangul sorting:
* pTblPtrs->pJamoIndex
Given a Jamo, this is the index into the pJamoComposition state
machine for this Jamo.
The value for U+1100 is stored in pJamoIndex[0], U+1101 is in
pJamoIndex[1], etc.
The value for U+1100 is 1. This means the state machine for
U+1100 is stored in pJamoComposition[1].
Note that not every Jamo can start a valid composition. For
those Jamos that can not start a valid composition, the table
entry for that Jamo is 0. E.g. the index for U+1101 is 0.
* pTblPtrs->NumJamoIndex
The number of entries in pJamoIndex. Every index is a WORD.
* pTblPtrs->pJamoComposition
This is the Jamo composition state machine. It is used for two
purposes:
1. Used to verify a valid Jamo combination that composes an
old Hangul character.
2. If a valid old Hangul composition is found, get the
SortInfo for the current combination.
* pTblPtrs->NumJamoComposition
The number of entires in pJamoComposition
Revision History:
05-30-2000 JohnMcCo Create old Hangul sorting algorithm and sample.
06-23-2000 YSLin Created.
--*/
//
// Include Files.
//
#include "nls.h"
#include "jamo.h"
//-------------------------------------------------------------------------//
// INTERNAL MACROS //
//-------------------------------------------------------------------------//
////////////////////////////////////////////////////////////////////////////
//
// NOT_END_STRING
//
// Checks to see if the search has reached the end of the string.
// It returns TRUE if the counter is not at zero (counting backwards) and
// the null termination has not been reached (if -2 was passed in the count
// parameter.
//
// 11-04-92 JulieB Created.
////////////////////////////////////////////////////////////////////////////
#define NOT_END_STRING(ct, ptr, cchIn) \
((ct != 0) && (!((*(ptr) == 0) && (cchIn == -2))))
////////////////////////////////////////////////////////////////////////////
//
// GET_JAMO_INDEX
//
// Update the global sort sequence info based on the new state.
//
////////////////////////////////////////////////////////////////////////////
#define GET_JAMO_INDEX(wch) ((wch) - NLS_CHAR_FIRST_JAMO)
//-------------------------------------------------------------------------//
// INTERNAL ROUTINES //
//-------------------------------------------------------------------------//
////////////////////////////////////////////////////////////////////////////
//
// UpdateJamoState
//
// Update the sort result info based on the new state.
//
// JamoClass The current Jamo class (LeadingJamo/VowelJamo/TrailingJamo)
// pSort The sort information derived from the current state.
// pSortResult The sort information for the final result. Used to
// collect info from pSort.
//
// 06-22-2000 YSLin Created.
////////////////////////////////////////////////////////////////////////////
void UpdateJamoState(
int JamoClass,
PJAMO_SORT_INFO pSort,
PJAMO_SORT_INFOEX pSortResult) // new sort sequence information
{
//
// Record if this is a jamo unique to old Hangul.
//
pSortResult->m_bOld |= pSort->m_bOld;
//
// Update the indices iff the new ones are higher than the current ones.
//
if (pSort->m_chLeadingIndex > pSortResult->m_chLeadingIndex)
{
pSortResult->m_chLeadingIndex = pSort->m_chLeadingIndex;
}
if (pSort->m_chVowelIndex > pSortResult->m_chVowelIndex)
{
pSortResult->m_chVowelIndex = pSort->m_chVowelIndex;
}
if (pSort->m_chTrailingIndex > pSortResult->m_chTrailingIndex)
{
pSortResult->m_chTrailingIndex = pSort->m_chTrailingIndex;
}
//
// Update the extra weights according to the current Jamo class.
//
switch (JamoClass)
{
case ( NLS_CLASS_LEADING_JAMO ) :
{
if (pSort->m_ExtraWeight > pSortResult->m_LeadingWeight)
{
pSortResult->m_LeadingWeight = pSort->m_ExtraWeight;
}
break;
}
case ( NLS_CLASS_VOWEL_JAMO ) :
{
if (pSort->m_ExtraWeight > pSortResult->m_VowelWeight)
{
pSortResult->m_VowelWeight = pSort->m_ExtraWeight;
}
break;
}
case ( NLS_CLASS_TRAILING_JAMO ) :
{
if (pSort->m_ExtraWeight > pSortResult->m_TrailingWeight)
{
pSortResult->m_TrailingWeight = pSort->m_ExtraWeight;
}
break;
}
}
}
////////////////////////////////////////////////////////////////////////////
//
// GetJamoComposition
//
// ppString pointer to the current Jamo character
// pCount pointer to the current character count (couting backwards)
// cchSrc The total character count (if the value is -2, then the string is null-terminated)
// currentJamoClass the current Jamo class.
// lpJamoTable The entry in jamo table.
// JamoSortInfo the sort information for the final result.
//
// NOTENOTE This function assumes that the character at *ppString is a leading Jamo.
//
// 06-12-2000 YSLin Created.
////////////////////////////////////////////////////////////////////////////
int GetJamoComposition(
LPCWSTR* ppString, // The pointer to the current character
int* pCount, // The current character count
int cchSrc, // The total character length
int currentJamoClass, // The current Jamo class.
JAMO_SORT_INFOEX* JamoSortInfo // The result Jamo sorting information.
)
{
WCHAR wch;
int JamoClass;
int Index;
PJAMO_TABLE pJamo;
PJAMO_COMPOSE_STATE lpNext = NULL;
PJAMO_COMPOSE_STATE pSearchEnd;
wch = **ppString;
//
// Get the Jamo information for the current character.
//
pJamo = pTblPtrs->pJamoIndex + GET_JAMO_INDEX(wch);
UpdateJamoState(currentJamoClass, &(pJamo->SortInfo), JamoSortInfo);
//
// Move on to next character.
//
(*ppString)++;
while (NOT_END_STRING(*pCount, *ppString, cchSrc))
{
wch = **ppString;
if (!IsJamo(wch))
{
// The current character is not a Jamo. We are done with checking the Jamo composition.
return (-1);
}
if (wch == 0x1160) {
JamoSortInfo->m_bFiller = TRUE;
}
// Get the Jamo class of it.
if (IsLeadingJamo(wch))
{
JamoClass = NLS_CLASS_LEADING_JAMO;
}
else if (IsTrailingJamo(wch))
{
JamoClass = NLS_CLASS_TRAILING_JAMO;
}
else
{
JamoClass = NLS_CLASS_VOWEL_JAMO;
}
if (JamoClass != currentJamoClass)
{
return (JamoClass);
}
if (lpNext == NULL)
{
//
// Get the index into the Jamo composition information.
//
Index = pJamo->Index;
if (Index == 0)
{
return (JamoClass);
}
lpNext = pTblPtrs->pJamoComposition + Index;
pSearchEnd = lpNext + pJamo->TransitionCount;
}
//
// Push the current Jamo (pointed by pString) into a state machine,
// to check if we have a valid old Hangul composition.
// During the check, we will also update the sortkey result in JamoSortInfo.
//
while (lpNext < pSearchEnd)
{
// Found a match--update the combination pointer and sort info.
if (lpNext->m_wcCodePoint == wch)
{
UpdateJamoState(currentJamoClass, &(lpNext->m_SortInfo), JamoSortInfo);
lpNext++;
goto NextChar;
}
// No match -- skip all transitions beginning with this code point
lpNext += lpNext->m_bTransitionCount + 1;
}
//
// We didn't find a valid old Hangul composition for the current character.
// So return the current Jamo class.
//
return (JamoClass);
NextChar:
// We are still in a valid old Hangul composition. Go check the next character.
(*ppString)++; (*pCount)--;
}
return (-1);
}
//-------------------------------------------------------------------------//
// EXTERNAL ROUTINES //
//-------------------------------------------------------------------------//
////////////////////////////////////////////////////////////////////////////
//
// MapOldHangulSortKey
//
// Check if the given string has a valid old Hangul composition,
// If yes, store the sortkey weights for the given string in the destination
// buffer and return the number of CHARs consumed by the composition.
// If not, return zero.
//
// NOTENOTE: This function assumes that string starting from pSrc is a
// leading Jamo.
//
// 06-12-2000 YSLin Created.
////////////////////////////////////////////////////////////////////////////
int MapOldHangulSortKey(
PLOC_HASH pHashN,
LPCWSTR pSrc, // source string
int cchSrc, // the length of the string
WORD* pUW, // generated Unicode weight
LPBYTE pXW, // generated extra weight (3 bytes)
BOOL fModify)
{
LPCWSTR pString = pSrc;
LPCWSTR pScan;
JAMO_SORT_INFOEX JamoSortInfo; // The result Jamo infomation.
int Count = cchSrc;
PSORTKEY pWeight;
int JamoClass; // The current Jamo class.
RtlZeroMemory(&JamoSortInfo, sizeof(JamoSortInfo));
JamoClass = GetJamoComposition(&pString, &Count, cchSrc, NLS_CLASS_LEADING_JAMO, &JamoSortInfo);
if (JamoClass == NLS_CLASS_VOWEL_JAMO)
{
JamoClass = GetJamoComposition(&pString, &Count, cchSrc, NLS_CLASS_VOWEL_JAMO, &JamoSortInfo);
}
if (JamoClass == NLS_CLASS_TRAILING_JAMO)
{
GetJamoComposition(&pString, &Count, cchSrc, NLS_CLASS_TRAILING_JAMO, &JamoSortInfo);
}
//
// If we have a valid leading and vowel sequences and this is an old
// Hangul,...
//
if (JamoSortInfo.m_bOld)
{
//
// Compute the modern Hangul syllable prior to this composition.
// Uses formula from Unicode 3.0 Section 3.11 p54
// "Hangul Syllable Composition".
//
WCHAR wchModernHangul =
(JamoSortInfo.m_chLeadingIndex * NLS_JAMO_VOWEL_COUNT + JamoSortInfo.m_chVowelIndex) * NLS_JAMO_TRAILING_COUNT
+ JamoSortInfo.m_chTrailingIndex
+ NLS_HANGUL_FIRST_SYLLABLE;
if (JamoSortInfo.m_bFiller)
{
// Sort before the modern Hangul, instead of after.
wchModernHangul--;
// If we fall off the modern Hangul syllable block,...
if (wchModernHangul < NLS_HANGUL_FIRST_SYLLABLE)
{
// Sort after the previous character (Circled Hangul Kiyeok A)
wchModernHangul = 0x326e;
}
// Shift the leading weight past any old Hangul that sorts after this modern Hangul
JamoSortInfo.m_LeadingWeight += 0x80;
}
pWeight = &((pHashN->pSortkey)[wchModernHangul]);
*pUW = GET_UNICODE_MOD(pWeight, fModify);
pXW[0] = JamoSortInfo.m_LeadingWeight;
pXW[1] = JamoSortInfo.m_VowelWeight;
pXW[2] = JamoSortInfo.m_TrailingWeight;
return (int)(pString - pSrc);
}
//
// Otherwise it isn't a valid old Hangul composition and we don't do
// anything with it.
//
return (0);
}