//+------------------------------------------------------------------------- // // Microsoft Windows // Copyright (C) Microsoft Corporation, 1992-1998. // // File: XlatChar.hxx // // Contents: Character translation class. // // Classes: CXlatChar // // History: 01-20-92 KyleP Created // //-------------------------------------------------------------------------- #pragma once #ifdef DISPLAY_INCLUDES #pragma message( "#include <" __FILE__ ">..." ) #endif // // Special character equivalence classes. I suppose in theory these // could doubly map to character classes in the regex but nearly // every unicode symbol must be treated uniquely somewhere in the // regex for that to happen. // UINT const symAny = 1; // Any single character (except BEG/END LINE) UINT const symBeginLine = 2; // Special character indicating beginning // of line. UINT const symEndLine = 3; // Special character indicating end of line. UINT const symInvalid = 4; // Guaranteed invalid UINT const symEpsilon = 5; // Epsilon move. UINT const symDot = 6; // '.' Don't ask. It's for DOS compliance UINT const cSpecialCharClasses = symDot; // 'normal' character classes // start here. //+------------------------------------------------------------------------- // // Class: CXlatChar // // Purpose: Maps UniCode characters to equivalence class(es). // // History: 20-Jan-92 KyleP Created // // Notes: Equivalence classes must consist of sequential characters. // The implementation for this class is a sorted array of // characters. Each character marks the end of a range. The // class for a given character is found by binary searching // the array until you end up in the appropriate range. // //-------------------------------------------------------------------------- class CXlatChar { public: CXlatChar( BOOLEAN fCaseSens ); CXlatChar( CXlatChar const & src ); inline ~CXlatChar(); void AddRange( WCHAR wcStart, WCHAR wcEnd ); UINT Translate( WCHAR wc ) const; UINT TranslateRange( WCHAR wcStart, WCHAR wcEnd ); inline UINT NumClasses() const; void Prepare(); private: UINT _Search( WCHAR wc ); void _Realloc(); WCHAR * _pwcRangeEnd; // Character in position i is the end // of the ith range. UINT _cRange; UINT _cAllocation; UINT _iPrevRange; BOOLEAN _fCaseSens; // TRUE if case sensitive mapping. #if (CIDBG == 1) BOOLEAN _fPrepared; public: // // Debug methods // void Display() const; #endif }; //+------------------------------------------------------------------------- // // Member: CXlatChar::NumClasses, public // // Returns: The number of different equivalence classes. // // History: 20-Jan-92 KyleP Created // //-------------------------------------------------------------------------- inline UINT CXlatChar::NumClasses() const { return( _cRange + cSpecialCharClasses ); } //+------------------------------------------------------------------------- // // Member: CXlatChar::~CXlatChar, public // // Synopsis: Destroys class. // // History: 20-Jan-92 KyleP Created // //-------------------------------------------------------------------------- inline CXlatChar::~CXlatChar() { delete _pwcRangeEnd; }