//+------------------------------------------------------------------------- // // Microsoft Windows // Copyright (C) Microsoft Corporation, 1991-1998. // // File: FA.hxx // // Contents: Non-deterministic finite automata // // Classes: CFA // CNFA // CDFA // // History: 20-Jan-92 KyleP Created // 19-Jun-92 KyleP Cleanup // //-------------------------------------------------------------------------- #pragma once #include #include #include #include #include class CInternalPropertyRestriction; WCHAR const wcAnySingle = '?'; WCHAR const wcAnyMultiple = '*'; WCHAR const wcDOSDot = '.'; WCHAR const wcRepeatZero = '*'; WCHAR const wcRepeatOne = '+'; WCHAR const wcRepeatZeroOrOne = '?'; WCHAR const wcBeginRange = '['; WCHAR const wcEndRange = ']'; WCHAR const wcInvertRange = '^'; WCHAR const wcRangeSep = '-'; WCHAR const wcEscape = '|'; WCHAR const wcOr = ','; WCHAR const wcBeginParen = '('; WCHAR const wcEndParen = ')'; WCHAR const wcBeginRepeat = '{'; WCHAR const wcEndRepeat = '}'; WCHAR const wcNextRepeat = ','; WCHAR const wcLastValidChar = 0xFFFF; // // Note that these are the 'top level' special characters. // Characters *on or after* these characters may have special meaning. // WCHAR const awcSpecialRegex[] = L"?*.|"; char const acSpecialRegex[] = "?*.|"; WCHAR const awcSpecialRegexReverse[] = L"?*.|+]),}"; //+------------------------------------------------------------------------- // // Class: CFA // // Purpose: Base class for finite automata. // // History: 20-Jan-92 KyleP Created // //-------------------------------------------------------------------------- class CFA { protected: inline CFA(); CFA( CFA const & src ); ~CFA(); void Add( CFAState * pState ); CFAState * Get( unsigned iState ); inline unsigned Count(); private: unsigned _cTotal; CFAState ** _ppState; }; //+------------------------------------------------------------------------- // // Class: CNFA // // Purpose: Non-deterministic finite automata. // // History: 20-Jan-92 Kylep Created // //-------------------------------------------------------------------------- class CNFA { public: CNFA( WCHAR const * pwcs, BOOLEAN fCaseSens ); CNFA( CNFA const & src ); ~CNFA(); inline unsigned StartState(); void EpsClosure( unsigned StateNum, CStateSet & ssOut ); void EpsClosure( CStateSet & ssIn, CStateSet & ssOut ); void Move( CStateSet & ssIn, CStateSet & ssOut, unsigned symbol = symEpsilon ); BOOLEAN IsFinal( CStateSet & ss ); inline CXlatChar const & Translate() const; inline unsigned NumStates() const; private: inline CNFAState * Get( unsigned iState ); void Parse( WCHAR const * wcs, unsigned * iStart, unsigned * iEnd, WCHAR const * * pwcsEnd = 0, WCHAR wcHalt = 0 ); void ParseRepeat( WCHAR const * & wcs, unsigned & cRepeat1, unsigned & cRepeat2 ); void FindCharClasses( WCHAR const * wcs ); void Replicate( unsigned iStart, unsigned iEnd, unsigned * piNewStart, unsigned * piNewEnd ); unsigned _iStart; // Start state unsigned _iNextState; static WCHAR * _wcsNull; CXlatChar _chars; // Wide character translator XArray _aState; // State array. #if (CIDBG == 1) public: // // Debug methods. // void Display(); #endif // (CIDBG == 1) }; //+------------------------------------------------------------------------- // // Class: CDFA // // Purpose: Deterministic finite automata. // // History: 20-Jan-92 Kylep Created // //-------------------------------------------------------------------------- class CDFA : public CFA { public: CDFA( WCHAR const * pwcs, CTimeLimit & timeLimit, BOOLEAN fCaseSens ); CDFA( CDFA const & CDFA ); ~CDFA(); BOOLEAN Recognize( WCHAR const * wcs ); private: void CommonCtor( ); inline BOOLEAN IsFinal( unsigned state ); inline unsigned Move( unsigned state, unsigned sym ) const; inline void AddTransition( unsigned state, unsigned sym, unsigned newstate ); inline BOOLEAN IsComputed( unsigned state ); void Add( unsigned state, BOOLEAN fFinal ); void Realloc(); # if CIDBG == 1 void ValidateStateTransitions(); # endif // CIDBG == 1 CNFA _nfa; // This must be the first member variable. CXlatState _xs; // Translate NFA state set to DFA state. unsigned _stateStart; // Starting DFA state. unsigned _cState; // Number of states XArray _xStateTrans; // Array of state transitions. XArray _xStateFinal; // _xStateFinal[i] TRUE if i is final state. CReadWriteAccess _rwa; // Locking. CTimeLimit & _timeLimit; // Execution time limit }; //+------------------------------------------------------------------------- // // Class: CRegXpr (regx) // // Purpose: Performs regular expression matches on properties // // History: 15-Apr-92 KyleP Created // //-------------------------------------------------------------------------- class CRegXpr : public CXpr { public: CRegXpr( CInternalPropertyRestriction * prst, CTimeLimit& timeLimit ); CRegXpr( CRegXpr const & regxpr ); virtual ~CRegXpr() {}; virtual CXpr * Clone(); virtual void SelectIndexing( CIndexStrategy & strategy ); virtual BOOL IsMatch( CRetriever & obj ); private: CXprPropertyValue _pxpval; // Retrieves value from database XPtr _xrstContentHelper; // Use content indexing CStorageVariant _varPrefix; // Fixed prefix (for value indexing) CDFA _dfa; // Finite automata engine ULONG _ulCodePage; // Code page of system }; //+------------------------------------------------------------------------- // // Member: CFA::CFA, protected // // Synopsis: Intializes a generic finite automata. // // History: 20-Jan-92 KyleP Created // //-------------------------------------------------------------------------- inline CFA::CFA() : _cTotal( 0 ), _ppState( 0 ) { } //+------------------------------------------------------------------------- // // Member: CFA::Count, protected // // Synopsis: Returns the count of states. // // History: 20-Jan-92 KyleP Created // //-------------------------------------------------------------------------- inline unsigned CFA::Count() { return( _cTotal ); } //+------------------------------------------------------------------------- // // Member: CNFA::Get, private // // Arguments: [iState] -- Index of state. // // Returns: The appropriate state. // // History: 20-Jan-92 Kylep Created // //-------------------------------------------------------------------------- inline CNFAState * CNFA::Get( unsigned iState ) { if ( iState > _aState.Count() ) { unsigned cNewState = iState + 10; XArray xState( cNewState ); for ( unsigned i = 0; i < _aState.Count(); i++ ) xState[i].Init( _aState[i] ); for ( ; i < cNewState; i++ ) xState[i].Init(i+1); _aState.Free(); _aState.Set( cNewState, xState.Acquire() ); } return &_aState[ iState - 1 ]; } //+------------------------------------------------------------------------- // // Member: CNFA::StartState, private // // Returns: The start state. // // History: 20-Jan-92 Kylep Created // //-------------------------------------------------------------------------- inline unsigned CNFA::StartState() { return( _iStart ); } //+------------------------------------------------------------------------- // // Member: CNFA::Translate, private // // Returns: The character translator. // // History: 20-Jan-92 Kylep Created // //-------------------------------------------------------------------------- inline CXlatChar const & CNFA::Translate() const { return( _chars ); } //+------------------------------------------------------------------------- // // Member: CNFA::NumStates, public // // Returns: The count of states currently in the automata. // // History: 20-Jan-92 Kylep Created // //-------------------------------------------------------------------------- inline unsigned CNFA::NumStates() const { return( _iNextState ); } //+------------------------------------------------------------------------- // // Member: CDFA::IsFinal, public // // Arguments: [state] -- Index of state. // // Returns: TRUE if state [state] is final. // // History: 20-Jan-92 Kylep Created // //-------------------------------------------------------------------------- inline BOOLEAN CDFA::IsFinal( unsigned state ) { return( _xStateFinal[ state ] ); } //+------------------------------------------------------------------------- // // Member: CDFA::Move, public // // Arguments: [state] -- Index of state. // [sym] -- Input symbol // // Returns: The new state reached from state [state] on an input // symbol [sym]. // // History: 20-Jan-92 Kylep Created // // Notes: If this function is ever changed to modify data, then // you need to also investigate the locking in CDFA::Recognize. // //-------------------------------------------------------------------------- inline unsigned CDFA::Move( unsigned state, unsigned sym ) const { return( _xStateTrans[state * (_nfa.Translate().NumClasses() + 1) + sym] ); } //+------------------------------------------------------------------------- // // Member: CDFA::AddTransition, private // // Effects: Adds a transtion from state [state] on input symbol [sym] // to state [newstate]. // // Arguments: [state] -- Index of state. // [sym] -- Input symbol. // [newstate] -- Index of state // // History: 20-Jan-92 Kylep Created // //-------------------------------------------------------------------------- inline void CDFA::AddTransition( unsigned state, unsigned sym, unsigned newstate ) { _xStateTrans[ state * ( _nfa.Translate().NumClasses() + 1 ) + sym ] = newstate; } //+------------------------------------------------------------------------- // // Member: CDFA::IsComputed, private // // Arguments: [state] -- Index of state. // // Returns: TRUE if the DFA contains a transition mapping for state // [state]. // // History: 20-Jan-92 Kylep Created // // Notes: An uncomputed state is one for which IsFinal has not been // computed. All transitions other transitions are // automatically set to stateUncomputed at allocation time. // //-------------------------------------------------------------------------- inline BOOLEAN CDFA::IsComputed( unsigned state ) { return ( state <= _cState && Move( state, 0 ) != stateUndefined ); }