// ========================================================================= // Copyright (C) 1997 - 1998, Microsoft Corporation. All Rights Reserved. // // FILE NAME : BASESUB.CPP // Function : BASE ENGINE FUNCTION COLLECTION // : NLP Base Engine Function // ========================================================================= #include "basesub.hpp" #include "basegbl.hpp" #include "stemkor.h" #include "MainDict.h" // ------------------------------------------------------------------------ // // // ------------------------------------------------------------------------ int NLP_Ge_Proc( char *stem ) { for (int i = 0; i < 3; i++) if(strcmp(stem, TempNoun[i]) == 0) return PRON_VALID; return BT; } // ------------------------------------------------------------------------ // // // ------------------------------------------------------------------------ int BaseEngine::NLP_Get_Ending( char *incode, char *Act, int *sp, int Endflag) { char ending[40]; BYTE action; int res, j = 1, codelen = lstrlen(incode) - 1; memset(ending, NULL, 40); sp[0] = -1; if(Endflag == 1) Act[0] = (unsigned char)0xf8; // if there is no tossi : action code 1111-1000 else Act[0] = 0x74; // if there is no endin : action code 0111-0100 for (int i = 0; i <= codelen; i++) { ending[i] = incode[codelen-i]; ending[i+1] = NULLCHAR; if(Endflag == 1) res = FindHeosaWord(ending, _TOSSI, &action); else res = FindHeosaWord(ending, _ENDING, &action); switch (res) { case FINAL : case FINAL_MORE : Act[j] = action; sp[j++] = i; // LMEPOS continue; case FALSE_MORE : continue; case NOT_FOUND : break; } break; } if (Endflag == 1 && sp [0] == 1) { sp [0] = 1; sp [1] = -1; Act [0] = Act [1]; Act [1] = (unsigned char)0xf8; } Act[j] = NULL; sp[j] = NULL; return j; } // ------------------------------------------------------------------------ // // // ------------------------------------------------------------------------ int BaseEngine::NLP_Num_Proc( char *stem) { char t_stem[80]; int t_ulspos; if(ULSPOS == -1) return BT; memset(t_stem, NULL, 80); lstrcpy(t_stem, stem); t_ulspos = lstrlen(t_stem)-1; int n = NumNoun.FindWord(t_stem, t_ulspos); if(n != -1) { if(NLP_CheckSuja(t_stem, t_ulspos) == VALID) return NUM_VALID; else return BT; } if(FindIrrWord(t_stem, _ZZNUM) & FINAL ) return NUM_VALID; if(NLP_CheckSuja(t_stem, t_ulspos) == VALID) return NUM_VALID; return BT; } // ---------------------------------------------------------------------- // // // ---------------------------------------------------------------------- int BaseEngine::NLP_CheckSuja( char *stem, int ulspos) { enum STATE {_BASE, _NUM} currentstate; currentstate = _BASE; enum OPERATION {_START, _NOSTART} currentphase; currentphase = _START; char currentbase = -1; char tempbase = -1; JumpNum.FindWord(stem, ulspos); for ( ; ulspos >= 0; ) { switch (currentstate) { case _BASE : tempbase = (char)BaseNum.FindWord(stem, ulspos,currentbase+1); if(tempbase != -1) { currentstate = _BASE; currentbase = tempbase; if(currentphase == _START) { for (int i = 0; i < 8; i++) if(strcmp(stem,DoubleNum[i]) == 0) return VALID; currentphase = _NOSTART; } break; } if(currentphase == _START) { for (int i = 0; i < 8; i++) if(strcmp(stem, DoubleNum[i]) == 0) return VALID; currentphase = _NOSTART; break; } if(SujaNum.FindWord(stem, ulspos) != -1) { currentstate = _NUM; break; } return INVALID; case _NUM : tempbase = (char)BaseNum.FindWord(stem, ulspos, currentbase+1); if(tempbase != -1) { currentstate = _BASE; currentbase = tempbase; break; } return INVALID; } } return VALID; } // ---------------------------------------------------------------------- // // // ---------------------------------------------------------------------- int BaseEngine::NLP_NCV_Proc( char *stem, char *ending) { int lULS; lULS = lstrlen(stem) - 1; if(ACT_C == 1 && ACT_V == 1) return NCV_VALID; if(ACT_C == 0 && ACT_V == 1) { if(stem[lULS] >= __V_k) return NCV_VALID; if(LME == __K_R && ending[LMEPOS-1] == __V_h && __IsDefEnd(LMEPOS, 1) == 1) if(stem[lULS] == __K_R) return NCV_VALID; return BT; } if(stem[lULS] >= __V_k) return BT; if(stem[lULS] == __K_R && __IsDefEnd(LMEPOS, 3) == 1 && ending[LMEPOS] == __K_I && ending[LMEPOS-1] == __V_m && ending[LMEPOS-2] == __K_R && ending[LMEPOS-3] == __V_h) return BT; return NCV_VALID; } // ---------------------------------------------------------------------- // // To process affix // // ---------------------------------------------------------------------- int BaseEngine::NLP_Fix_Proc(char *stem, char *ending) { char prestem[80], bufstem[80], suffix [80], prefix [80], index[1]; int ulspos, temp; prefix [0] = '\0'; suffix [0] = '\0'; lstrcpy(prestem, stem); ulspos = ULSPOS; if(__IsDefStem(ULSPOS, 2) == 1 && prestem[ULSPOS-2] == __K_D && prestem[ULSPOS-1] == __V_m && prestem[ULSPOS] == __K_R) { if(lstrlen(ending) == 0 || ACT_P_A == 1) // sp == 0 || ACT_P_A == 1 { if(FindIrrWord(stem, _ZPN) & FINAL) { int len = lstrlen (stem); memcpy (suffix, stem+len-3, 4); stem [len-3] = '\0'; char tstem [80]; Conv.INS2HAN(stem, tstem, codeWanSeong); lstrcat (lrgsz, tstem); vbuf [wcount++] = POS_PRONOUN; lstrcat (lrgsz, "+"); Conv.INS2HAN(suffix, tstem, codeWanSeong); lstrcat(lrgsz, tstem); vbuf [wcount++] = POS_SUFFIX; return Deol_VALID; } } temp = ulspos; __DelStemN(prestem, &temp, 3); ulspos = temp; index[0] = 'm'; char tstem [80]; Conv.INS2HAN (prestem, tstem, codeWanSeong); if (FindSilsaWord (tstem) & _NOUN) { int len = lstrlen (stem); memcpy (suffix, stem+len-3, 4); lstrcpy (stem, prestem); lstrcat (lrgsz, tstem); vbuf [wcount++] = POS_NOUN; lstrcat (lrgsz, "+"); Conv.INS2HAN(suffix, tstem, codeWanSeong); lstrcat(lrgsz, tstem); vbuf [wcount++] = POS_SUFFIX; return Deol_VALID; } return MORECHECK; } if(PrefixCheck(prestem, bufstem) != -1) { index[0] = 'm'; char tstem [80]; Conv.INS2HAN (bufstem, tstem, codeWanSeong); if (FindSilsaWord (tstem) & _NOUN) { int len = lstrlen(stem) - lstrlen(bufstem); memcpy (prefix, stem, len); prefix [len] = '\0'; lstrcpy (stem, bufstem); Conv.INS2HAN(prefix, tstem, codeWanSeong); lstrcat (lrgsz, tstem); vbuf [wcount++] = POS_PREFIX; lstrcat (lrgsz, "+"); Conv.INS2HAN(stem, tstem, codeWanSeong); lstrcat(lrgsz, tstem); vbuf [wcount++] = POS_NOUN; return Pref_VALID; } } if(Suffix.FindWord(prestem, ulspos) != -1) { index[0] = 'm'; char tstem [80]; Conv.INS2HAN (prestem, tstem, codeWanSeong); if (FindSilsaWord (tstem) & _NOUN) { lstrcat (lrgsz, tstem); vbuf [wcount++] = POS_NOUN; lstrcat (lrgsz, "+"); Conv.INS2HAN(stem+lstrlen(prestem), tstem, codeWanSeong); lstrcat(lrgsz, tstem); vbuf [wcount++] = POS_SUFFIX; return Suf_VALID; } } lstrcpy(prestem, stem); ulspos = ULSPOS; if(Suffix.FindWord(prestem, ulspos) != -1 && PrefixCheck(prestem, bufstem) != -1) { index[0] = 'm'; char tstem [80]; Conv.INS2HAN (bufstem, tstem, codeWanSeong); if (FindSilsaWord (tstem) & _NOUN) { prestem [lstrlen(prestem) - lstrlen(bufstem)] = 0; Conv.INS2HAN(prestem, tstem, codeWanSeong); lstrcat (lrgsz, tstem); vbuf [wcount++] = POS_PREFIX; lstrcat (lrgsz, "+"); Conv.INS2HAN(bufstem, tstem, codeWanSeong); lstrcat(lrgsz, tstem); vbuf [wcount++] = POS_NOUN; lstrcat (lrgsz, "+"); Conv.INS2HAN(stem + lstrlen (prestem) + lstrlen (bufstem), tstem, codeWanSeong); lstrcat(lrgsz, tstem); vbuf [wcount++] = POS_SUFFIX; return PreSuf_VALID; } } return MORECHECK; } int BaseEngine::NLP_Find_Pronoun(char *stem, char *ending) { if(FindIrrWord(stem, _ZPN) & FINAL) { if ((ending [0] == __V_k && ending [1] == __K_G) || (ending [0] == __V_p && ending [1] == __K_G)) { if ((stem [0] == __K_N && stem [1] == __V_j) || (stem [0] == __K_N && stem [1] == __V_k) || (stem [0] == __K_J && stem [1] == __V_j)) { return MORECHECK; } else if (stem [0] == __K_N && stem [1] == __V_o) { stem [1] = __V_k; } else if (stem [0] == __K_N && stem [1] == __V_p) { stem [1] = __V_j; } else if (stem [0] == __K_J && stem [1] == __V_p) { stem [1] = __V_j; } } else { int len = lstrlen (stem) - 1; if (len > 4 && stem [len] == __K_D && stem [len - 1] == __V_m && stem [len - 2] == __K_R) stem [len-2] = '\0'; } char tstem [80]; Conv.INS2HAN (stem, tstem, codeWanSeong); lstrcat (lrgsz, tstem); vbuf [wcount++] = POS_PRONOUN; return VALID; } return MORECHECK; } // ------------------------------------------------------------------ // // // ------------------------------------------------------------------ int PrefixCheck(char *stem, char *prestem) { int i, j, l, PreLen, WordLen; char buf1[5], buf2[5]; i = 0; PreLen = 9; WordLen = 5; while (i < PreLen) { j = TempPrefix[(i*WordLen)+4]; memset(buf1, NULL, 5); for (l = 0; l <= j; l++) buf1[l] = TempPrefix[(i*WordLen)+l]; memset(buf2, NULL, 5); for (l = 0; l <= j; l++) buf2[l] = stem[l]; if(strcmp(buf1, buf2) == 0) { //found j = 0; memset(prestem, NULL, 80); while (stem[l] != 0x00) prestem[j++] = stem[l++]; return 1; } i++; } return -1; } void SetSilHeosa (int ivalue, WORD *rvalue) { switch (ivalue&0x0f00) { case POS_NOUN : ivalue |= wtSilsa; break; case POS_VERB : ivalue |= wtSilsa; break; case POS_SUFFIX : if ((ivalue&0x00ff) == DEOL_SUFFIX) ivalue |= wtHeosa; else ivalue |= wtSilsa; break; case POS_PREFIX : ivalue |= wtSilsa; break; case POS_ADJECTIVE : ivalue |= wtSilsa; break; case POS_PRONOUN : ivalue |= wtSilsa; break; case POS_NUMBER : ivalue |= wtSilsa; break; case POS_AUXADJ : ivalue |= wtHeosa; break; case POS_AUXVERB : ivalue |= wtHeosa; break; case POS_OTHERS : ivalue |= wtHeosa; break; case POS_TOSSI : ivalue |= wtHeosa; break; case POS_ENDING : ivalue |= wtHeosa; break; case POS_SPECIFIER : ivalue |= wtHeosa; break; } *rvalue = (WORD)ivalue; }