windows-nt/Source/XPSP1/NT/inetsrv/intlwb/kor/bsdict.h
2020-09-26 16:20:57 +08:00

211 lines
5.7 KiB
C++

/////////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 1997 - 1998, Microsoft Corporation. All Rights Reserved.
//
// BSDict.h :
//
// Owner : ChaeSeong Lim, HET MSCH RND (e-mail:cslim@microsoft.com)
//
// History : 1996/Mar
/////////////////////////////////////////////////////////////////////////////
#ifndef __DOUBLEBSDICT_H__
#define __DOUBLEBSDICT_H__
#if !defined (_UNICODE) && !defined (_MBCS)
#error _UNICODE or _MBCS is required.
#endif
// Maximun number of length dictionary in the silsa dict. (currently use 9)
#define MAX_LENGTH_DICT 9 // You should check word hash size in hash.h
// Currently using 20 byte long buffer can
// contain 18 byte(9 chars) length word.
#define MAX_BUFFER_SIZE 2048
/////////////////////////////////////////////////////////////////////////////
// _IndexHeader will used as a index Header
struct _IndexHeader {
// 16 bytes
BYTE wordLen;
BYTE reserved;
UINT indexSize, blockSize;
WORD numOfBlocks;
UINT numberOfWords;
_IndexHeader() {
wordLen = 0; indexSize = blockSize = 0; numOfBlocks = 0;
numberOfWords = 0; reserved = 0;
}
_IndexHeader(BYTE _wordLen, UINT _blockSize, UINT _indexSize) {
wordLen = _wordLen;
indexSize = _indexSize;
//content word size(bytes) + pumsa(2) + index(2) + numOfWords(8);
blockSize = _blockSize;
numOfBlocks = 0;
numberOfWords = 0; reserved = 0;
}
};
#define SILSA_DICT_HEADER_SIZE 1024
//#define COPYRIGHT_STR "Copyright (C) 1996 Hangul Engineering Team. Microsoft Korea(MSCH). All rights reserved.\nVer 2.0 1996/3"
struct _DictHeader {
//char COPYRIGHT_HEADER[150];
WORD numOfLenDict;
DWORD iBlock;
_DictHeader() {
numOfLenDict=0; iBlock=0;
//memset(COPYRIGHT_HEADER, '\0', sizeof(COPYRIGHT_HEADER));
//strcpy(COPYRIGHT_HEADER, COPYRIGHT_STR);
//COPYRIGHT_HEADER[strlen(COPYRIGHT_HEADER)+1] = '\032';
//numOfLenDict=0; iBlock=0;
}
};
//#define DICT_HEADER_SIZE 16
//#define INDEX_HEADER_SIZE 20
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
// CDoubleBSDict
class CDoubleBSDict {
public:
// Constructor
// m_wordLen denote number of real two byte word. not byte length
CDoubleBSDict() {
m_pIndexHeader = new _IndexHeader;
hIndex = 0;
}
CDoubleBSDict(int wordLen, int blockSize) {
m_pIndexHeader = new _IndexHeader((BYTE)wordLen, blockSize,
(wordLen << 1) + 2 + sizeof(WORD)*2); // index size
hIndex = 0;
}
// Attributes
// Operations
//virtual void Delete(const _TCHAR *key) = 0;
//virtual BOOL Find(const _TCHAR *) = 0;
int GetNumOfBlocks() { return m_pIndexHeader->numOfBlocks; }
int GetBlockSize() { return m_pIndexHeader->blockSize; }
int GetIndexSize() { return m_pIndexHeader->indexSize; }
int GetWordLen() { return m_pIndexHeader->wordLen; }
int GetWordByteLen() { return ((m_pIndexHeader->wordLen)<<1); }
// Implementations
protected:
_IndexHeader *m_pIndexHeader;
HGLOBAL hIndex;
public:
// Destructor
~CDoubleBSDict() {
if (m_pIndexHeader) delete m_pIndexHeader;
if (hIndex) GlobalFree(hIndex);
}
};
/////////////////////////////////////////////////////////////////////////////
// CDoubleMemDict class
class CDoubleMemBSDict : public CDoubleBSDict {
public:
// Constructor
CDoubleMemBSDict() { hBlocks = 0; }
CDoubleMemBSDict(int wordSize, int blockSize)
: CDoubleBSDict(wordSize, blockSize) { hBlocks = 0; }
// Attributes
// Operations
void BuildFromTextFile(LPCTSTR lpfilename);
DWORD WriteIndex(HANDLE hOut);
DWORD WriteBlocks(HANDLE hOut);
//void Delete(const _TCHAR *key);
// Implementations
protected:
HGLOBAL hBlocks;
HANDLE hInput;
UINT m_maxWordsInBlock;
int m_readPerOnce;
int ReadWord(BYTE *contentWord, int *pumsa);
void ReadBlock(int blockNumber, int *readNum, int *readUniQue);
BOOL AllocIndexNBlock();
private:
public:
// Destructor
~CDoubleMemBSDict();
};
class BlockCache;
/////////////////////////////////////////////////////////////////////////////
// CDoubleFileBSDict class
class CDoubleFileBSDict : public CDoubleBSDict {
public:
// Constructor
CDoubleFileBSDict() : CDoubleBSDict() { }
CDoubleFileBSDict(int wordSize, int blockSize)
: CDoubleBSDict(wordSize, blockSize) { }
// Attributes
// Operations
void LoadIndex(HANDLE hInput);
int FindWord(HANDLE hDict, DWORD fpBlock, LPCTSTR lpWord);
// Implementations
protected:
void LoadIndexHeader(HANDLE hInput);
int FindIndex(LPCTSTR lpWord, int left, int right, BYTE *pumsa);
int FindBlock(LPCTSTR lpWord, int left, int right);
int Comp(LPCTSTR lpMiddle, LPCTSTR lpWord);
BYTE *lpIndex;
private:
static BYTE lpBuffer[MAX_BUFFER_SIZE];
static BYTE *lpCurIndex;
public:
// Destructor
~CDoubleFileBSDict() { }
};
/////////////////////////////////////////////////////////////////////////////
// CDoubleFileBSDict class inline fuction
inline
int CDoubleFileBSDict::Comp(LPCTSTR lpMiddle, LPCTSTR lpWord )
{
#ifdef _MBCS
for (int i=0; i<GetWordByteLen(); i++) {
#elif _UNICODE
for (int i=0; i<GetWordLen(); i++) {
#endif
int test = *(lpMiddle+i) - *(lpWord+i);
if (test<0) return -1;
else
if (test>0) return 1;
}
return 0;
}
#endif // !__DOUBLEBSDICT_H__