457 lines
11 KiB
C++
457 lines
11 KiB
C++
|
//+-------------------------------------------------------------------------
|
||
|
//
|
||
|
// Microsoft Windows
|
||
|
// Copyright (C) Microsoft Corporation, 1991-1998.
|
||
|
//
|
||
|
// File: FA.hxx
|
||
|
//
|
||
|
// Contents: Non-deterministic finite automata
|
||
|
//
|
||
|
// Classes: CFA
|
||
|
// CNFA
|
||
|
// CDFA
|
||
|
//
|
||
|
// History: 20-Jan-92 KyleP Created
|
||
|
// 19-Jun-92 KyleP Cleanup
|
||
|
//
|
||
|
//--------------------------------------------------------------------------
|
||
|
|
||
|
#pragma once
|
||
|
|
||
|
#include <xpr.hxx>
|
||
|
#include <state.hxx>
|
||
|
#include <xlatstat.hxx>
|
||
|
#include <xlatchar.hxx>
|
||
|
#include <timlimit.hxx>
|
||
|
|
||
|
class CInternalPropertyRestriction;
|
||
|
|
||
|
WCHAR const wcAnySingle = '?';
|
||
|
WCHAR const wcAnyMultiple = '*';
|
||
|
|
||
|
WCHAR const wcDOSDot = '.';
|
||
|
|
||
|
WCHAR const wcRepeatZero = '*';
|
||
|
WCHAR const wcRepeatOne = '+';
|
||
|
WCHAR const wcRepeatZeroOrOne = '?';
|
||
|
|
||
|
WCHAR const wcBeginRange = '[';
|
||
|
WCHAR const wcEndRange = ']';
|
||
|
WCHAR const wcInvertRange = '^';
|
||
|
WCHAR const wcRangeSep = '-';
|
||
|
|
||
|
WCHAR const wcEscape = '|';
|
||
|
|
||
|
WCHAR const wcOr = ',';
|
||
|
|
||
|
WCHAR const wcBeginParen = '(';
|
||
|
WCHAR const wcEndParen = ')';
|
||
|
|
||
|
WCHAR const wcBeginRepeat = '{';
|
||
|
WCHAR const wcEndRepeat = '}';
|
||
|
WCHAR const wcNextRepeat = ',';
|
||
|
|
||
|
WCHAR const wcLastValidChar = 0xFFFF;
|
||
|
//
|
||
|
// Note that these are the 'top level' special characters.
|
||
|
// Characters *on or after* these characters may have special meaning.
|
||
|
//
|
||
|
|
||
|
WCHAR const awcSpecialRegex[] = L"?*.|";
|
||
|
char const acSpecialRegex[] = "?*.|";
|
||
|
WCHAR const awcSpecialRegexReverse[] = L"?*.|+]),}";
|
||
|
|
||
|
//+-------------------------------------------------------------------------
|
||
|
//
|
||
|
// Class: CFA
|
||
|
//
|
||
|
// Purpose: Base class for finite automata.
|
||
|
//
|
||
|
// History: 20-Jan-92 KyleP Created
|
||
|
//
|
||
|
//--------------------------------------------------------------------------
|
||
|
|
||
|
class CFA
|
||
|
{
|
||
|
protected:
|
||
|
|
||
|
inline CFA();
|
||
|
|
||
|
CFA( CFA const & src );
|
||
|
|
||
|
~CFA();
|
||
|
|
||
|
void Add( CFAState * pState );
|
||
|
|
||
|
CFAState * Get( unsigned iState );
|
||
|
|
||
|
inline unsigned Count();
|
||
|
|
||
|
private:
|
||
|
|
||
|
unsigned _cTotal;
|
||
|
CFAState ** _ppState;
|
||
|
};
|
||
|
|
||
|
//+-------------------------------------------------------------------------
|
||
|
//
|
||
|
// Class: CNFA
|
||
|
//
|
||
|
// Purpose: Non-deterministic finite automata.
|
||
|
//
|
||
|
// History: 20-Jan-92 Kylep Created
|
||
|
//
|
||
|
//--------------------------------------------------------------------------
|
||
|
|
||
|
class CNFA
|
||
|
{
|
||
|
public:
|
||
|
|
||
|
CNFA( WCHAR const * pwcs, BOOLEAN fCaseSens );
|
||
|
|
||
|
CNFA( CNFA const & src );
|
||
|
|
||
|
~CNFA();
|
||
|
|
||
|
inline unsigned StartState();
|
||
|
|
||
|
void EpsClosure( unsigned StateNum, CStateSet & ssOut );
|
||
|
|
||
|
void EpsClosure( CStateSet & ssIn, CStateSet & ssOut );
|
||
|
|
||
|
void Move( CStateSet & ssIn, CStateSet & ssOut, unsigned symbol = symEpsilon );
|
||
|
|
||
|
BOOLEAN IsFinal( CStateSet & ss );
|
||
|
|
||
|
inline CXlatChar const & Translate() const;
|
||
|
|
||
|
inline unsigned NumStates() const;
|
||
|
|
||
|
private:
|
||
|
|
||
|
inline CNFAState * Get( unsigned iState );
|
||
|
|
||
|
void Parse( WCHAR const * wcs,
|
||
|
unsigned * iStart,
|
||
|
unsigned * iEnd,
|
||
|
WCHAR const * * pwcsEnd = 0,
|
||
|
WCHAR wcHalt = 0 );
|
||
|
|
||
|
void ParseRepeat( WCHAR const * & wcs,
|
||
|
unsigned & cRepeat1,
|
||
|
unsigned & cRepeat2 );
|
||
|
|
||
|
void FindCharClasses( WCHAR const * wcs );
|
||
|
|
||
|
void Replicate( unsigned iStart,
|
||
|
unsigned iEnd,
|
||
|
unsigned * piNewStart,
|
||
|
unsigned * piNewEnd );
|
||
|
|
||
|
unsigned _iStart; // Start state
|
||
|
unsigned _iNextState;
|
||
|
|
||
|
static WCHAR * _wcsNull;
|
||
|
|
||
|
CXlatChar _chars; // Wide character translator
|
||
|
|
||
|
XArray<CNFAState> _aState; // State array.
|
||
|
|
||
|
#if (CIDBG == 1)
|
||
|
|
||
|
public:
|
||
|
|
||
|
//
|
||
|
// Debug methods.
|
||
|
//
|
||
|
|
||
|
void Display();
|
||
|
|
||
|
#endif // (CIDBG == 1)
|
||
|
};
|
||
|
|
||
|
//+-------------------------------------------------------------------------
|
||
|
//
|
||
|
// Class: CDFA
|
||
|
//
|
||
|
// Purpose: Deterministic finite automata.
|
||
|
//
|
||
|
// History: 20-Jan-92 Kylep Created
|
||
|
//
|
||
|
//--------------------------------------------------------------------------
|
||
|
|
||
|
class CDFA : public CFA
|
||
|
{
|
||
|
public:
|
||
|
|
||
|
CDFA( WCHAR const * pwcs, CTimeLimit & timeLimit, BOOLEAN fCaseSens );
|
||
|
|
||
|
CDFA( CDFA const & CDFA );
|
||
|
|
||
|
~CDFA();
|
||
|
|
||
|
BOOLEAN Recognize( WCHAR const * wcs );
|
||
|
|
||
|
private:
|
||
|
|
||
|
void CommonCtor( );
|
||
|
|
||
|
inline BOOLEAN IsFinal( unsigned state );
|
||
|
|
||
|
inline unsigned Move( unsigned state, unsigned sym ) const;
|
||
|
|
||
|
inline void AddTransition( unsigned state, unsigned sym, unsigned newstate );
|
||
|
|
||
|
inline BOOLEAN IsComputed( unsigned state );
|
||
|
|
||
|
void Add( unsigned state, BOOLEAN fFinal );
|
||
|
|
||
|
void Realloc();
|
||
|
|
||
|
# if CIDBG == 1
|
||
|
void ValidateStateTransitions();
|
||
|
# endif // CIDBG == 1
|
||
|
|
||
|
CNFA _nfa; // This must be the first member variable.
|
||
|
|
||
|
CXlatState _xs; // Translate NFA state set to DFA state.
|
||
|
unsigned _stateStart; // Starting DFA state.
|
||
|
|
||
|
unsigned _cState; // Number of states
|
||
|
XArray<unsigned> _xStateTrans; // Array of state transitions.
|
||
|
XArray<BOOLEAN> _xStateFinal; // _xStateFinal[i] TRUE if i is final state.
|
||
|
|
||
|
CReadWriteAccess _rwa; // Locking.
|
||
|
CTimeLimit & _timeLimit; // Execution time limit
|
||
|
|
||
|
};
|
||
|
|
||
|
//+-------------------------------------------------------------------------
|
||
|
//
|
||
|
// Class: CRegXpr (regx)
|
||
|
//
|
||
|
// Purpose: Performs regular expression matches on properties
|
||
|
//
|
||
|
// History: 15-Apr-92 KyleP Created
|
||
|
//
|
||
|
//--------------------------------------------------------------------------
|
||
|
|
||
|
class CRegXpr : public CXpr
|
||
|
{
|
||
|
public:
|
||
|
|
||
|
CRegXpr( CInternalPropertyRestriction * prst, CTimeLimit& timeLimit );
|
||
|
|
||
|
CRegXpr( CRegXpr const & regxpr );
|
||
|
|
||
|
virtual ~CRegXpr() {};
|
||
|
|
||
|
virtual CXpr * Clone();
|
||
|
|
||
|
virtual void SelectIndexing( CIndexStrategy & strategy );
|
||
|
|
||
|
virtual BOOL IsMatch( CRetriever & obj );
|
||
|
|
||
|
private:
|
||
|
|
||
|
CXprPropertyValue _pxpval; // Retrieves value from database
|
||
|
XPtr<CRestriction> _xrstContentHelper; // Use content indexing
|
||
|
CStorageVariant _varPrefix; // Fixed prefix (for value indexing)
|
||
|
CDFA _dfa; // Finite automata engine
|
||
|
ULONG _ulCodePage; // Code page of system
|
||
|
};
|
||
|
|
||
|
//+-------------------------------------------------------------------------
|
||
|
//
|
||
|
// Member: CFA::CFA, protected
|
||
|
//
|
||
|
// Synopsis: Intializes a generic finite automata.
|
||
|
//
|
||
|
// History: 20-Jan-92 KyleP Created
|
||
|
//
|
||
|
//--------------------------------------------------------------------------
|
||
|
|
||
|
inline CFA::CFA()
|
||
|
: _cTotal( 0 ),
|
||
|
_ppState( 0 )
|
||
|
{
|
||
|
}
|
||
|
|
||
|
//+-------------------------------------------------------------------------
|
||
|
//
|
||
|
// Member: CFA::Count, protected
|
||
|
//
|
||
|
// Synopsis: Returns the count of states.
|
||
|
//
|
||
|
// History: 20-Jan-92 KyleP Created
|
||
|
//
|
||
|
//--------------------------------------------------------------------------
|
||
|
|
||
|
inline unsigned CFA::Count()
|
||
|
{
|
||
|
return( _cTotal );
|
||
|
}
|
||
|
|
||
|
//+-------------------------------------------------------------------------
|
||
|
//
|
||
|
// Member: CNFA::Get, private
|
||
|
//
|
||
|
// Arguments: [iState] -- Index of state.
|
||
|
//
|
||
|
// Returns: The appropriate state.
|
||
|
//
|
||
|
// History: 20-Jan-92 Kylep Created
|
||
|
//
|
||
|
//--------------------------------------------------------------------------
|
||
|
|
||
|
inline CNFAState * CNFA::Get( unsigned iState )
|
||
|
{
|
||
|
if ( iState > _aState.Count() )
|
||
|
{
|
||
|
unsigned cNewState = iState + 10;
|
||
|
XArray<CNFAState> xState( cNewState );
|
||
|
|
||
|
for ( unsigned i = 0; i < _aState.Count(); i++ )
|
||
|
xState[i].Init( _aState[i] );
|
||
|
|
||
|
for ( ; i < cNewState; i++ )
|
||
|
xState[i].Init(i+1);
|
||
|
|
||
|
_aState.Free();
|
||
|
_aState.Set( cNewState, xState.Acquire() );
|
||
|
}
|
||
|
|
||
|
return &_aState[ iState - 1 ];
|
||
|
}
|
||
|
|
||
|
//+-------------------------------------------------------------------------
|
||
|
//
|
||
|
// Member: CNFA::StartState, private
|
||
|
//
|
||
|
// Returns: The start state.
|
||
|
//
|
||
|
// History: 20-Jan-92 Kylep Created
|
||
|
//
|
||
|
//--------------------------------------------------------------------------
|
||
|
|
||
|
inline unsigned CNFA::StartState()
|
||
|
{
|
||
|
return( _iStart );
|
||
|
}
|
||
|
|
||
|
//+-------------------------------------------------------------------------
|
||
|
//
|
||
|
// Member: CNFA::Translate, private
|
||
|
//
|
||
|
// Returns: The character translator.
|
||
|
//
|
||
|
// History: 20-Jan-92 Kylep Created
|
||
|
//
|
||
|
//--------------------------------------------------------------------------
|
||
|
|
||
|
inline CXlatChar const & CNFA::Translate() const
|
||
|
{
|
||
|
return( _chars );
|
||
|
}
|
||
|
|
||
|
//+-------------------------------------------------------------------------
|
||
|
//
|
||
|
// Member: CNFA::NumStates, public
|
||
|
//
|
||
|
// Returns: The count of states currently in the automata.
|
||
|
//
|
||
|
// History: 20-Jan-92 Kylep Created
|
||
|
//
|
||
|
//--------------------------------------------------------------------------
|
||
|
|
||
|
inline unsigned CNFA::NumStates() const
|
||
|
{
|
||
|
return( _iNextState );
|
||
|
}
|
||
|
|
||
|
|
||
|
//+-------------------------------------------------------------------------
|
||
|
//
|
||
|
// Member: CDFA::IsFinal, public
|
||
|
//
|
||
|
// Arguments: [state] -- Index of state.
|
||
|
//
|
||
|
// Returns: TRUE if state [state] is final.
|
||
|
//
|
||
|
// History: 20-Jan-92 Kylep Created
|
||
|
//
|
||
|
//--------------------------------------------------------------------------
|
||
|
|
||
|
inline BOOLEAN CDFA::IsFinal( unsigned state )
|
||
|
{
|
||
|
return( _xStateFinal[ state ] );
|
||
|
}
|
||
|
|
||
|
//+-------------------------------------------------------------------------
|
||
|
//
|
||
|
// Member: CDFA::Move, public
|
||
|
//
|
||
|
// Arguments: [state] -- Index of state.
|
||
|
// [sym] -- Input symbol
|
||
|
//
|
||
|
// Returns: The new state reached from state [state] on an input
|
||
|
// symbol [sym].
|
||
|
//
|
||
|
// History: 20-Jan-92 Kylep Created
|
||
|
//
|
||
|
// Notes: If this function is ever changed to modify data, then
|
||
|
// you need to also investigate the locking in CDFA::Recognize.
|
||
|
//
|
||
|
//--------------------------------------------------------------------------
|
||
|
|
||
|
inline unsigned CDFA::Move( unsigned state, unsigned sym ) const
|
||
|
{
|
||
|
return( _xStateTrans[state * (_nfa.Translate().NumClasses() + 1) + sym] );
|
||
|
}
|
||
|
|
||
|
//+-------------------------------------------------------------------------
|
||
|
//
|
||
|
// Member: CDFA::AddTransition, private
|
||
|
//
|
||
|
// Effects: Adds a transtion from state [state] on input symbol [sym]
|
||
|
// to state [newstate].
|
||
|
//
|
||
|
// Arguments: [state] -- Index of state.
|
||
|
// [sym] -- Input symbol.
|
||
|
// [newstate] -- Index of state
|
||
|
//
|
||
|
// History: 20-Jan-92 Kylep Created
|
||
|
//
|
||
|
//--------------------------------------------------------------------------
|
||
|
|
||
|
inline void CDFA::AddTransition( unsigned state, unsigned sym, unsigned newstate )
|
||
|
{
|
||
|
_xStateTrans[ state * ( _nfa.Translate().NumClasses() + 1 ) + sym ] =
|
||
|
newstate;
|
||
|
}
|
||
|
|
||
|
|
||
|
//+-------------------------------------------------------------------------
|
||
|
//
|
||
|
// Member: CDFA::IsComputed, private
|
||
|
//
|
||
|
// Arguments: [state] -- Index of state.
|
||
|
//
|
||
|
// Returns: TRUE if the DFA contains a transition mapping for state
|
||
|
// [state].
|
||
|
//
|
||
|
// History: 20-Jan-92 Kylep Created
|
||
|
//
|
||
|
// Notes: An uncomputed state is one for which IsFinal has not been
|
||
|
// computed. All transitions other transitions are
|
||
|
// automatically set to stateUncomputed at allocation time.
|
||
|
//
|
||
|
//--------------------------------------------------------------------------
|
||
|
|
||
|
inline BOOLEAN CDFA::IsComputed( unsigned state )
|
||
|
{
|
||
|
return ( state <= _cState &&
|
||
|
Move( state, 0 ) != stateUndefined );
|
||
|
}
|
||
|
|