697 lines
22 KiB
C
697 lines
22 KiB
C
|
|
||
|
/*************************************************************************
|
||
|
* *
|
||
|
* STEM.C *
|
||
|
* *
|
||
|
* Copyright (C) Microsoft Corporation 1990-1994 *
|
||
|
* All Rights reserved. *
|
||
|
* *
|
||
|
**************************************************************************
|
||
|
* *
|
||
|
* Module Intent *
|
||
|
* This module contains the functions to strip off the suffix of a word *
|
||
|
* It is based on the research paper of Dr. Porter, pulished in *
|
||
|
* An algorithm for suffix stripping *
|
||
|
* Program, Vol.14, no.3,pp 130-137, July 1980 *
|
||
|
* *
|
||
|
* Description: *
|
||
|
* *
|
||
|
* The full description of the algorithm can be found in that document *
|
||
|
* Basically, the algorithm consists of: *
|
||
|
* - Matching the suffix from a table of suffixes *
|
||
|
* - Applies the rule that comes with the suffix *
|
||
|
* - If the rule matches, then change the suffix to the new one *
|
||
|
* *
|
||
|
* Comments: *
|
||
|
* *
|
||
|
* 1/ There are some misconceptions about stripping the suffix *
|
||
|
* People are thinking in term of super-smart algorithm that can *
|
||
|
* strip a word to its stem. The fact is that it is not necessarily *
|
||
|
* true. For example, DIED is strippe to DI, but not DIE. *
|
||
|
* *
|
||
|
* 2/ The current code is SLOW, but it easy to understand in term *
|
||
|
* of implementation, since it is straigthforward from the algorithm *
|
||
|
* description. The impact on runtime is nothing. On compiled time *
|
||
|
* stemming 5,000,000 words will take less than 1 hour, which is *
|
||
|
* acceptable, since a project that large requires 1-2 days to *
|
||
|
* compile. *
|
||
|
* *
|
||
|
* To improve the speed (up to 2 times), we can scan the suffix *
|
||
|
* if one letter doesn't match we can jump pass all stem that have *
|
||
|
* this letter *
|
||
|
* WARNING: Tab setting is 4 for this file *
|
||
|
* *
|
||
|
**************************************************************************
|
||
|
* *
|
||
|
* Current Owner: BinhN *
|
||
|
* *
|
||
|
**************************************************************************/
|
||
|
#include <mvopsys.h>
|
||
|
#include <memory.h>
|
||
|
#include <mvsearch.h>
|
||
|
#include "common.h"
|
||
|
|
||
|
|
||
|
#define VOWEL 0
|
||
|
#define CONSONANT 1
|
||
|
#define MIXED 2
|
||
|
|
||
|
#define MIN_LENGTH_FOR_STEM 3
|
||
|
|
||
|
/* Rule table structure */
|
||
|
|
||
|
typedef struct RULE
|
||
|
{
|
||
|
LPB szInitSuffix; // Initial suffix
|
||
|
LPB szNewSuffix; // New suffix
|
||
|
LPB szCondition; // Stemming condition
|
||
|
short NextTable; // Next table to jump to
|
||
|
} RULE, FAR *LPRULE;
|
||
|
|
||
|
/* The conventional letter used for the stemming condition are:
|
||
|
*
|
||
|
* '1': Measure == 1
|
||
|
* '2': Measure > 1
|
||
|
* 'd': Double consonant at the end (*d in the document)
|
||
|
* 'o': Form cvc , and 2nd c is not W, X or Y (*o in the document)
|
||
|
* 'p': Measure > 0
|
||
|
* 's': Remove the last consonant (used with 'd')
|
||
|
* 'v': Word contains vowels (*v* in the document)
|
||
|
* '*': Terminated with the next letter (*S in the document)
|
||
|
* '&': AND operation
|
||
|
* '|': OR operation
|
||
|
* '!': NOT operation
|
||
|
* The rule operation is based on a postfix notation, so "m=1 and *o*" is
|
||
|
* described as "1o&"
|
||
|
*/
|
||
|
|
||
|
RULE RuleTab0[] =
|
||
|
{
|
||
|
"\4sses", "\2ss", NULL, 1,
|
||
|
"\3ies", "\1i", NULL, 1,
|
||
|
"\2ss", "\2ss", NULL, 1,
|
||
|
"\1s", "\0", NULL, 1,
|
||
|
NULL, NULL, NULL, 1,
|
||
|
};
|
||
|
|
||
|
RULE RuleTab1[] =
|
||
|
{
|
||
|
"\3eed", "\2ee", "p", 3,
|
||
|
"\2ed", "\0", "v", 2,
|
||
|
"\3ing", "\0", "v", 2,
|
||
|
NULL, NULL, NULL, 3,
|
||
|
};
|
||
|
|
||
|
RULE RuleTab2[] =
|
||
|
{
|
||
|
"\2at", "\3ate", NULL, 3,
|
||
|
"\2bl", "\3ble", NULL, 3,
|
||
|
"\2iz", "\3ize", NULL, 3,
|
||
|
|
||
|
/* The following szNewSuffix has a negative \377
|
||
|
* (-1) length. It is to be used to reduce a
|
||
|
* double consonant ending to single consonant
|
||
|
*/
|
||
|
|
||
|
"\0", "\377\0", "*l*s|*z|!d&s", 3,
|
||
|
"\0", "\1e", "1o&", 3,
|
||
|
NULL, NULL, NULL, 3,
|
||
|
};
|
||
|
|
||
|
RULE RuleTab3[] =
|
||
|
{
|
||
|
"\1y", "\1i", "v", 4,
|
||
|
NULL, NULL, NULL, 4,
|
||
|
};
|
||
|
|
||
|
RULE RuleTab4[] =
|
||
|
{
|
||
|
"\7ational", "\3ate", "p", 5,
|
||
|
"\6tional", "\4tion", "p", 5,
|
||
|
"\4enci", "\4ence", "p", 5,
|
||
|
"\4anci", "\4ance", "p", 5,
|
||
|
"\4izer", "\3ize", "p", 5,
|
||
|
"\4abli", "\4able", "p", 5,
|
||
|
"\4alli", "\2al", "p", 5,
|
||
|
"\5entli", "\3ent", "p", 5,
|
||
|
"\3eli", "\1e", "p", 5,
|
||
|
"\5ousli", "\3ous", "p", 5,
|
||
|
"\7ization", "\3ize", "p", 5,
|
||
|
"\5ation", "\3ate", "p", 5,
|
||
|
"\4ator", "\3ate", "p", 5,
|
||
|
"\5alism", "\2al", "p", 5,
|
||
|
"\7iveness", "\3ive", "p", 5,
|
||
|
"\7fulness", "\3ful", "p", 5,
|
||
|
"\7ousness", "\3ous", "p", 5,
|
||
|
"\5aliti", "\2al", "p", 5,
|
||
|
"\5iviti", "\3ive", "p", 5,
|
||
|
"\6biliti", "\3ble", "p", 5,
|
||
|
NULL, NULL, NULL, 5,
|
||
|
};
|
||
|
|
||
|
RULE RuleTab5[] =
|
||
|
{
|
||
|
"\5icate", "\2ic", "p", 6,
|
||
|
"\5ative", "\0", "p", 6,
|
||
|
"\5alize", "\2al", "p", 6,
|
||
|
"\5iciti", "\2ic", "p", 6,
|
||
|
"\4ical", "\2ic", "p", 6,
|
||
|
"\3ful", "\0", "p", 6,
|
||
|
"\4ness", "\0", "p", 6,
|
||
|
NULL, NULL, NULL, 6,
|
||
|
};
|
||
|
|
||
|
RULE RuleTab6[] =
|
||
|
{
|
||
|
"\2al", "\0", "2", 7,
|
||
|
"\4ance", "\0", "2", 7,
|
||
|
"\4ence", "\0", "2", 7,
|
||
|
"\2er", "\0", "p", 7,
|
||
|
"\2ic", "\0", "2", 7,
|
||
|
"\4able", "\0", "2", 7,
|
||
|
"\4ible", "\0", "2", 7,
|
||
|
"\3ant", "\0", "2", 7,
|
||
|
"\5ement", "\0", "2", 7,
|
||
|
"\4ment", "\0", "2", 7,
|
||
|
"\3ent", "\0", "2", 7,
|
||
|
"\3ion", "\0", "2*s*t|&", 7,
|
||
|
"\2ou", "\0", "2", 7,
|
||
|
"\3ism", "\0", "2", 7,
|
||
|
"\3ate", "\0", "2", 7,
|
||
|
"\3iti", "\0", "2", 7,
|
||
|
"\3ous", "\0", "2", 7,
|
||
|
"\3ive", "\0", "2", 7,
|
||
|
"\3ize", "\0", "2", 7,
|
||
|
NULL, NULL, NULL, 7,
|
||
|
};
|
||
|
|
||
|
|
||
|
RULE RuleTab7[] =
|
||
|
{
|
||
|
"\1e", "\0", "2", 8,
|
||
|
"\1e", "\0", "1o!&", 8,
|
||
|
NULL, NULL, NULL, 8,
|
||
|
};
|
||
|
|
||
|
RULE RuleTab8[] =
|
||
|
{
|
||
|
"\2ll", "\1l", "2", 9,
|
||
|
"\0", "\377\0", "2*l&d&s", 9,
|
||
|
NULL, NULL, NULL, 9,
|
||
|
};
|
||
|
|
||
|
char CharTypeTab[] =
|
||
|
{
|
||
|
VOWEL, //a
|
||
|
CONSONANT, //b
|
||
|
CONSONANT, //c
|
||
|
CONSONANT, //d
|
||
|
VOWEL, //e
|
||
|
CONSONANT, //f
|
||
|
CONSONANT, //g
|
||
|
CONSONANT, //h
|
||
|
VOWEL, //i
|
||
|
CONSONANT, //j
|
||
|
CONSONANT, //k
|
||
|
CONSONANT, //l
|
||
|
CONSONANT, //m
|
||
|
CONSONANT, //n
|
||
|
VOWEL, //o
|
||
|
CONSONANT, //p
|
||
|
CONSONANT, //q
|
||
|
CONSONANT, //r
|
||
|
CONSONANT, //s
|
||
|
CONSONANT, //t
|
||
|
VOWEL, //u
|
||
|
CONSONANT, //v
|
||
|
CONSONANT, //w
|
||
|
CONSONANT, //x
|
||
|
MIXED, //y, consonant, but may be vowel if after consonant
|
||
|
CONSONANT, //z
|
||
|
};
|
||
|
|
||
|
LPRULE RuleTables[] =
|
||
|
{
|
||
|
RuleTab0,
|
||
|
RuleTab1,
|
||
|
RuleTab2,
|
||
|
RuleTab3,
|
||
|
RuleTab4,
|
||
|
RuleTab5,
|
||
|
RuleTab6,
|
||
|
RuleTab7,
|
||
|
RuleTab8,
|
||
|
NULL,
|
||
|
};
|
||
|
|
||
|
/*************************************************************************
|
||
|
*
|
||
|
* INTERNAL PRIVATE FUNCTIONS
|
||
|
* All of them should be declared near
|
||
|
*************************************************************************/
|
||
|
int PRIVATE PASCAL NEAR MeasureCalc (LPB, int);
|
||
|
int PRIVATE PASCAL NEAR ConditionMet (LPB, LPB, LPB, int);
|
||
|
int PRIVATE PASCAL NEAR SuffixMatch (LPB lpbWord, LPB lpSuffix);
|
||
|
HRESULT PRIVATE PASCAL NEAR MarkType (LPB, LPB, int);
|
||
|
|
||
|
|
||
|
/*************************************************************************
|
||
|
*
|
||
|
* @doc API INDEX RETRIEVAL
|
||
|
*
|
||
|
* @func HRESULT PASCAL FAR | FStem |
|
||
|
* This function will strip the suffix from a word, ie, "stem" it
|
||
|
*
|
||
|
* @parm LPB | lpbStemWord |
|
||
|
* Buffer to contain the stemmed word
|
||
|
*
|
||
|
* @parm LPB | lpbWord |
|
||
|
* Word to be stemmed
|
||
|
*
|
||
|
* @rdesc S_OK if succeeded, or E_INVALIDARG if the null argument is
|
||
|
* passed
|
||
|
*
|
||
|
* @comm The word passed must have all the letters in lower case for
|
||
|
* The function to work with. WARNING: There is no checking about
|
||
|
* case, so thing can go wrong if the word contains upper case letter
|
||
|
* or non alphabetic letter.
|
||
|
*
|
||
|
*************************************************************************/
|
||
|
|
||
|
PUBLIC HRESULT PASCAL FAR EXPORT_API FStem (LPB lpbStemWord, LPB lpbWord)
|
||
|
{
|
||
|
register int wLength; // Length of the word
|
||
|
register int i; // Scratch variable
|
||
|
LPRULE lpRuleTab; // Pointer to rule table
|
||
|
LPRULE lpRule; // Pointer to rule
|
||
|
int wLengthSaved;
|
||
|
int wNewSuffixLength; // This must be signed!
|
||
|
int wInitSuffixLength;
|
||
|
char lpbWordType [CB_MAX_WORD_LEN];
|
||
|
LPB szInitSuffix;
|
||
|
LPB szNewSuffix;
|
||
|
int TableIndex; // For debugging purpose only
|
||
|
int RuleIndex; // For debugging purpose only
|
||
|
LPB lpbTmp;
|
||
|
|
||
|
if (lpbWord == NULL)
|
||
|
return E_INVALIDARG;
|
||
|
|
||
|
wLength = (*(LPW)lpbWordType = *((LPW)lpbWord));
|
||
|
if (wLength >= CB_MAX_WORD_LEN)
|
||
|
return(E_WORDTOOLONG);
|
||
|
|
||
|
|
||
|
/* Copy the word over */
|
||
|
MEMCPY (lpbStemWord, lpbWord, wLength + 2);
|
||
|
|
||
|
/* Don't do any stemming for words <= 3 bytes */
|
||
|
if (wLength <= MIN_LENGTH_FOR_STEM)
|
||
|
return S_OK;
|
||
|
|
||
|
/* Mark the type of each letter to be consonant or vowel */
|
||
|
if (MarkType (lpbStemWord+2, lpbWordType+2, wLength) != S_OK)
|
||
|
{
|
||
|
/* We got some non alphabetic characters. Just return */
|
||
|
return S_OK;
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Traverse all the tables and check for stemming conditions */
|
||
|
|
||
|
for (TableIndex = 0, lpRuleTab = RuleTables[0]; lpRuleTab;)
|
||
|
{
|
||
|
|
||
|
/* Check for each rule */
|
||
|
|
||
|
for (RuleIndex = 0, lpRule = lpRuleTab;
|
||
|
szInitSuffix = lpRule->szInitSuffix; lpRule++, RuleIndex++)
|
||
|
{
|
||
|
|
||
|
szNewSuffix = lpRule->szNewSuffix;
|
||
|
|
||
|
/* The casting is needed to make wNewSuffixLength signed */
|
||
|
wNewSuffixLength = (char)*szNewSuffix++;
|
||
|
|
||
|
wInitSuffixLength = (char)*szInitSuffix++;
|
||
|
|
||
|
/* Check for condition match */
|
||
|
|
||
|
if (wLength >= wInitSuffixLength)
|
||
|
{
|
||
|
|
||
|
lpbTmp = lpbStemWord + wLength + 2 - wInitSuffixLength;
|
||
|
/* Compare the suffixes */
|
||
|
|
||
|
for (i = wInitSuffixLength;
|
||
|
i > 0 && (*lpbTmp == *szInitSuffix);
|
||
|
i--, lpbTmp++, szInitSuffix++);
|
||
|
|
||
|
/* Restore szInitSuffix */
|
||
|
szInitSuffix = lpRule->szInitSuffix;
|
||
|
|
||
|
if (i != 0) // String comparison fails
|
||
|
continue;
|
||
|
|
||
|
/* Save the word length */
|
||
|
wLengthSaved = wLength;
|
||
|
|
||
|
/* Update word length since we don't include the suffix
|
||
|
* length in our computation
|
||
|
*/
|
||
|
wLength -= wInitSuffixLength;
|
||
|
|
||
|
|
||
|
/* Now check the stemming condition */
|
||
|
|
||
|
if (ConditionMet (lpbStemWord, lpbWordType,
|
||
|
lpRule->szCondition, wLength))
|
||
|
{
|
||
|
|
||
|
/* Rule applies, change to the new suffix */
|
||
|
|
||
|
if (wNewSuffixLength > 0)
|
||
|
{
|
||
|
|
||
|
MEMCPY (&lpbStemWord[wLength+2], szNewSuffix,
|
||
|
wNewSuffixLength);
|
||
|
|
||
|
/* Update the word type */
|
||
|
|
||
|
MarkType (szNewSuffix,
|
||
|
lpbWordType + wLength + 2, wNewSuffixLength);
|
||
|
}
|
||
|
|
||
|
/* Update the word length
|
||
|
* The check for wLength is necessary since we don't
|
||
|
* want to strip evething
|
||
|
*/
|
||
|
|
||
|
if (wLength + wNewSuffixLength > 0)
|
||
|
*(LPW)lpbStemWord = (wLength += wNewSuffixLength);
|
||
|
|
||
|
if (wLength <= MIN_LENGTH_FOR_STEM)
|
||
|
goto Done;
|
||
|
|
||
|
break;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
|
||
|
/* Rule doesn't apply, Restore the word length */
|
||
|
wLength = wLengthSaved;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* Go to the next table */
|
||
|
lpRuleTab = RuleTables [TableIndex = lpRule->NextTable];
|
||
|
}
|
||
|
|
||
|
Done:
|
||
|
lpbStemWord[*((LPW)lpbStemWord)+2] = 0;
|
||
|
return S_OK;
|
||
|
}
|
||
|
|
||
|
/*************************************************************************
|
||
|
*
|
||
|
* @doc INTERNAL
|
||
|
*
|
||
|
* @func int PASCAL NEAR | MeasureCalc |
|
||
|
* Calculate the measure of a word. The measure is defined as
|
||
|
* the pair (VC), where V is the vowels, and C consonants. A word
|
||
|
* is described as [C](VC)m[V], where the first C and the last V are
|
||
|
* optional. m is the measure of the word (or part of word without
|
||
|
* the suffix). Example:
|
||
|
* architect: m = 3 (arch, it, ect)
|
||
|
* convention: m = 3 (onv, ent, ion)
|
||
|
* lie: m = 0, since the first consonant, and the last vowels
|
||
|
* don't count
|
||
|
*
|
||
|
* @parm LPB | lpbWordType |
|
||
|
* Buffer containing word type
|
||
|
*
|
||
|
* @parm int | wLength |
|
||
|
* The length of the word
|
||
|
*
|
||
|
* @rdesc Return the measure of the word
|
||
|
*
|
||
|
*************************************************************************/
|
||
|
|
||
|
int PRIVATE PASCAL NEAR MeasureCalc (LPB lpbWordType, register int wLength)
|
||
|
{
|
||
|
register int cMeasure;
|
||
|
|
||
|
#if 0
|
||
|
/* Safety chck
|
||
|
* IFdef out for speed. This is a internal function
|
||
|
*/
|
||
|
if (lpbWordType == NULL)
|
||
|
return 0;
|
||
|
#endif
|
||
|
|
||
|
/* Initialize the word measure */
|
||
|
cMeasure = 0;
|
||
|
|
||
|
/* Skip the beginning consonants */
|
||
|
|
||
|
for (;wLength > 0 && *lpbWordType == CONSONANT; wLength--, lpbWordType++);
|
||
|
|
||
|
/* Get the vowel/consonant pairs */
|
||
|
while (wLength > 0)
|
||
|
{
|
||
|
|
||
|
/* Get all the vowels */
|
||
|
for (; wLength > 0 && *lpbWordType == VOWEL; wLength--, lpbWordType++);
|
||
|
|
||
|
if (wLength > 0)
|
||
|
{
|
||
|
cMeasure ++;
|
||
|
|
||
|
/* Get all the consonants */
|
||
|
for (; wLength > 0 && *lpbWordType == CONSONANT;
|
||
|
wLength--, lpbWordType++);
|
||
|
}
|
||
|
}
|
||
|
return cMeasure;
|
||
|
}
|
||
|
|
||
|
/*************************************************************************
|
||
|
*
|
||
|
* @doc INTERNAL
|
||
|
*
|
||
|
* @func int PASCAL NEAR | ConditionMet |
|
||
|
* This fuction check the condition to be met by a particular
|
||
|
* suffix.
|
||
|
*
|
||
|
* @parm LPB | lpbWord |
|
||
|
* Buffer contains the word to be stemmed> This is a 2-byte prefixed
|
||
|
* pascal string
|
||
|
*
|
||
|
* @parm LPB | lpbWordType |
|
||
|
* Buffer containing the type of each letter of the word. This
|
||
|
* is a parallel buffer
|
||
|
*
|
||
|
* @parm LPB | szCondition |
|
||
|
* Condtion in postfix form
|
||
|
*
|
||
|
* @parm int | wLength |
|
||
|
* Length of the word
|
||
|
*
|
||
|
* @rdesc TRUE, if the condition is met, FALSE otherwise
|
||
|
*
|
||
|
*************************************************************************/
|
||
|
|
||
|
int PRIVATE PASCAL NEAR ConditionMet (LPB lpbWord, LPB lpbWordType,
|
||
|
LPB szCondition, int wLength)
|
||
|
{
|
||
|
int StackIndex;
|
||
|
int Stack[4];
|
||
|
int wLengthSaved;
|
||
|
int LastByte;
|
||
|
LPB lpbTmp;
|
||
|
LPB lpbTmpType;
|
||
|
|
||
|
if (szCondition == NULL)
|
||
|
return TRUE;
|
||
|
|
||
|
/* Initialize variables
|
||
|
* Note: The original codes are written for a 1-byte length preceded
|
||
|
* string. The new format is 2-byte preceded string. To minimize the
|
||
|
* change, lpbTmp is used, and points to the 2nd byte
|
||
|
*/
|
||
|
|
||
|
StackIndex = -1;
|
||
|
lpbTmp = lpbWord + 1;
|
||
|
lpbTmpType = lpbWordType + 1;
|
||
|
LastByte = lpbTmp[wLength];
|
||
|
|
||
|
while (*szCondition)
|
||
|
{
|
||
|
switch (*szCondition)
|
||
|
{
|
||
|
case '*': // *S in the document
|
||
|
|
||
|
/* Check to see if the stem ends with the next letter */
|
||
|
|
||
|
Stack[++StackIndex] =
|
||
|
(LastByte == *(++szCondition));
|
||
|
break;
|
||
|
|
||
|
case 'd': // *d in the document
|
||
|
|
||
|
/* Check to see if the stem ends with a double consonant */
|
||
|
|
||
|
Stack[++StackIndex] = (wLength > 2 &&
|
||
|
LastByte == lpbTmp[wLength - 1] &&
|
||
|
lpbTmpType[wLength] == CONSONANT);
|
||
|
break;
|
||
|
|
||
|
case 's': // Remove the last consonant
|
||
|
if (Stack[0])
|
||
|
{
|
||
|
lpbTmp[wLength] = 0;
|
||
|
wLength --;
|
||
|
*(LPW)lpbWordType = *(LPW)lpbWord = (WORD) wLength;
|
||
|
}
|
||
|
break;
|
||
|
|
||
|
case 'v': // *v* in the document
|
||
|
|
||
|
/* Check to see if the word has a vowel */
|
||
|
|
||
|
wLengthSaved = wLength; /* Save the length */
|
||
|
|
||
|
for (; wLength &&
|
||
|
lpbTmpType[wLength] != VOWEL; wLength--);
|
||
|
|
||
|
Stack[++StackIndex] = wLength > 0;
|
||
|
|
||
|
/* Restore the word length */
|
||
|
wLength = wLengthSaved;
|
||
|
break;
|
||
|
|
||
|
case 'o':
|
||
|
/* *o in the document, ie.
|
||
|
- The word ends with the form cvc
|
||
|
- The second c is not W, X, Y
|
||
|
The +2 is for skipping the word length
|
||
|
*/
|
||
|
Stack[++StackIndex] = (wLength >= 3) &&
|
||
|
(lpbWordType[wLength + 1] == CONSONANT) &&
|
||
|
(lpbWordType[wLength] == VOWEL) &&
|
||
|
(lpbWordType[wLength - 1] == CONSONANT) &&
|
||
|
(LastByte != 'w' && LastByte != 'x' && LastByte != 'y');
|
||
|
break;
|
||
|
|
||
|
/* The conditions below test Measure. If they fails, then
|
||
|
* the whole condition fails. ie. there is no need to test
|
||
|
* any other conditions. There is no need to save the result
|
||
|
* on the stack
|
||
|
*/
|
||
|
|
||
|
case 'p': // Measure > 0
|
||
|
if ((Stack[++StackIndex] =
|
||
|
MeasureCalc (lpbWordType+2, wLength) > 0) == FALSE)
|
||
|
return FALSE;
|
||
|
break;
|
||
|
|
||
|
case '2': // Measure > 1
|
||
|
if ((Stack[++StackIndex] =
|
||
|
MeasureCalc (lpbWordType+2, wLength) > 1) == FALSE)
|
||
|
return FALSE;
|
||
|
break;
|
||
|
|
||
|
case '1': // Measure == 1
|
||
|
if ((Stack[++StackIndex] =
|
||
|
MeasureCalc (lpbWordType+2, wLength) == 1) == FALSE)
|
||
|
return FALSE;
|
||
|
break;
|
||
|
|
||
|
/* The next conditions are operators combination */
|
||
|
|
||
|
case '|':
|
||
|
|
||
|
/* OR the result of the top 2 stack entries */
|
||
|
|
||
|
Stack[StackIndex-1] |= Stack[StackIndex];
|
||
|
StackIndex--;
|
||
|
break;
|
||
|
|
||
|
case '&':
|
||
|
|
||
|
/* AND the result of the top 2 stack entries */
|
||
|
|
||
|
Stack[StackIndex-1] &= Stack[StackIndex];
|
||
|
StackIndex--;
|
||
|
break;
|
||
|
|
||
|
case '!':
|
||
|
|
||
|
/* NOT the result of the top stack entry */
|
||
|
|
||
|
Stack[StackIndex] = !Stack[StackIndex];
|
||
|
break;
|
||
|
|
||
|
default:
|
||
|
return FALSE;
|
||
|
}
|
||
|
szCondition++;
|
||
|
}
|
||
|
|
||
|
return Stack[0];
|
||
|
}
|
||
|
|
||
|
/*************************************************************************
|
||
|
*
|
||
|
* @doc INTERNAL
|
||
|
*
|
||
|
* @func HRESULT PASCAL NEAR | MarkType |
|
||
|
* Marking the type of each letter of the word to be CONSONANT or
|
||
|
* VOWEL
|
||
|
*
|
||
|
* @parm LPB | lpbWord |
|
||
|
* Buffer containing the word
|
||
|
*
|
||
|
* @parm LPB | lpBufType |
|
||
|
* Buffer to contain the type of the letters
|
||
|
*
|
||
|
* @parm int | wLength |
|
||
|
* Length of the word
|
||
|
*
|
||
|
*************************************************************************/
|
||
|
|
||
|
HRESULT PRIVATE PASCAL NEAR MarkType (LPB lpbWord, LPB lpBufType, int wLength)
|
||
|
{
|
||
|
for (; wLength > 0; lpBufType++, lpbWord++, wLength--)
|
||
|
{
|
||
|
|
||
|
/* Consider wildcard characters to be consonnant */
|
||
|
if (*lpbWord == '?' || *lpbWord == '*')
|
||
|
{
|
||
|
*lpBufType = CONSONANT;
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
if (*lpbWord < 'a' || *lpbWord > 'z')
|
||
|
return E_FAIL;
|
||
|
|
||
|
switch (CharTypeTab [*lpbWord - 'a'])
|
||
|
{
|
||
|
case CONSONANT:
|
||
|
*lpBufType = CONSONANT;
|
||
|
break;
|
||
|
case VOWEL:
|
||
|
*lpBufType = VOWEL;
|
||
|
break;
|
||
|
case MIXED:
|
||
|
if (*(lpBufType - 1) == CONSONANT)
|
||
|
*lpBufType = VOWEL;
|
||
|
else
|
||
|
*lpBufType = CONSONANT;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
return S_OK;
|
||
|
}
|