windows-nt/Source/XPSP1/NT/ds/adsi/router/sql/lexer.hxx

326 lines
11 KiB
C++
Raw Normal View History

2020-09-26 03:20:57 -05:00
/*++
Copyright (c) 1995 Microsoft Corporation
Module Name:
lexer.cxx
Abstract:
This file exports the class the class CLexer and other declarations
that recognize the tokens in the string repressentation of the search
filter. The format of the search filter according to the specification of
Minimal SQL Grammar which is a subset of ANSI SQL 92.
Author:
Shankara Shastry [ShankSh] 13-Dec-1996
*/
/*
#include <stdlib.h>
#include <nt.h>
#include <ntrtl.h>
#include <nturtl.h>
#include <windows.h>
#include <wtypes.h>
*/
#include "dswarn.h"
#include "..\..\..\include\procs.hxx"
//
// chunk of memory allocated for lexeme each time memory is needed.
//
#define LEXEME_UNIT_LENGTH 256
//
// Allowable tokens in the search string
//
#define TOKEN_ERROR 0
#define TOKEN_EQ 1
#define TOKEN_STAR 2
#define TOKEN_LPARAN 3
#define TOKEN_RPARAN 4
#define TOKEN_INTEGER_LITERAL 5
#define TOKEN_REAL_LITERAL 6
#define TOKEN_STRING_LITERAL 7
#define TOKEN_USER_DEFINED_NAME 8
#define TOKEN_COMMA 9
#define TOKEN_LT 10
#define TOKEN_GT 11
#define TOKEN_LE 12
#define TOKEN_GE 13
#define TOKEN_NE 14
#define TOKEN_SELECT 15
#define TOKEN_ALL 16
#define TOKEN_FROM 17
#define TOKEN_WHERE 18
#define TOKEN_BOOLEAN_LITERAL 19
#define TOKEN_AND 20
#define TOKEN_OR 21
#define TOKEN_NOT 22
#define TOKEN_ORDER 23
#define TOKEN_BY 24
#define TOKEN_ASC 25
#define TOKEN_DESC 26
#define TOKEN_END 27
#define TOKEN_START 0
#define MAX_KEYWORD_LEN 20
#define gKWTable { \
L"SELECT", \
L"ALL", \
L"FROM", \
L"WHERE", \
L"AND", \
L"OR", \
L"NOT", \
L"TRUE", \
L"FALSE", \
L"ON", \
L"OFF", \
L"YES", \
L"NO", \
L"ORDER", \
L"BY", \
L"ASC", \
L"DESC", \
L"" }
#define gKW2Token {\
TOKEN_SELECT, \
TOKEN_ALL, \
TOKEN_FROM, \
TOKEN_WHERE, \
TOKEN_AND, \
TOKEN_OR, \
TOKEN_NOT, \
TOKEN_BOOLEAN_LITERAL, \
TOKEN_BOOLEAN_LITERAL, \
TOKEN_BOOLEAN_LITERAL, \
TOKEN_BOOLEAN_LITERAL, \
TOKEN_BOOLEAN_LITERAL, \
TOKEN_BOOLEAN_LITERAL, \
TOKEN_ORDER, \
TOKEN_BY, \
TOKEN_ASC, \
TOKEN_DESC, \
TOKEN_ERROR }
//
// Final states;
//
#define STATE_ERROR 100
#define STATE_EQ 101
#define STATE_STAR 102
#define STATE_LPARAN 103
#define STATE_RPARAN 104
#define STATE_INTEGER_LITERAL 105
#define STATE_REAL_LITERAL 106
#define STATE_STRING_LITERAL 107
#define STATE_USER_DEFINED_NAME 108
#define STATE_COMMA 109
#define STATE_LT 110
#define STATE_GT 111
#define STATE_LE 112
#define STATE_GE 113
#define STATE_NE 114
#define STATE_SELECT 115
#define STATE_ALL 116
#define STATE_FROM 117
#define STATE_WHERE 118
#define STATE_BOOLEAN_LITERAL 119
#define STATE_AND 120
#define STATE_OR 121
#define STATE_NOT 122
#define STATE_ORDER 123
#define STATE_BY 124
#define STATE_ASC 125
#define STATE_DESC 126
#define STATE_END 127
#define START_STATE 0
#define FINAL_STATES_BEGIN 100
#define MAX_DFA_STATES 12 // No. of states in the DFA
// No. of different groups of characters for which the DFA behaves differently
// For eg., all alphabetical characters generate the same behaviour and can be
// considered the same as for DFA is concerned. This is mainly to reduce the
// size of the table.
#define MAX_CHAR_CLASSES 17
// which specifies all other characters not mentioned explicitly.
#define OTHER_CHAR_CLASS 14
//Various actions associated with a particular entry in the DFA table.
#define ACTION_DEFAULT 0
#define ACTION_IGNORE_ESCAPECHAR 1
#define ACTION_PUSHBACK_CHAR 2
/* The state transition table is a table Table[i,j] with i being the current
state and j being the input sets and the value Table[i,j] being the structure
containing the next state and the action id to be performed. State 0 and 1 are
the starting states when recognizing AttrType and AttrVal respectively.*/
/* '<' '>' '=' '*' '(' ')' '+' '-' alpha-{e,E} digit '.' ''' {E,e} 'space' 'other' '\0' ',' */
#define gStateTable {\
/* 0 */ {{ 1, 0}, { 2, 0}, {101,0}, {102,0}, {103,0}, {104,0}, { 4, 0}, { 4, 0}, { 3, 0}, { 5, 0}, { 6, 0}, {10, 1}, { 3, 0}, { 0 ,1}, {100,0}, {127,0}, {109,0}}, \
/* 1 */ {{110,2}, {114,0}, {112,0}, {110,2}, {110,2}, {110,2}, {110,2}, {110,2}, {110,2}, {110,2}, {110,2}, {110,2}, {110,2}, {110,2}, {110,2}, {110,2}, {110,2}}, \
/* 2 */ {{111,2}, {111,2}, {113,0}, {111,2}, {111,2}, {111,2}, {111,2}, {111,2}, {111,2}, {111,2}, {111,2}, {111,2}, {111,2}, {111,2}, {111,2}, {111,2}, {111,2}}, \
/* 3 */ {{108,2}, {108,2}, {108,2}, { 3,0}, {108,2}, {108,2}, { 3,0}, { 3,0}, { 3, 0}, { 3, 0}, { 3,0}, { 3,0}, { 3, 0}, {108,2}, {108,2}, {108,2}, {108,2}}, \
/* 4 */ {{100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, { 5, 0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,2}, {100,0}}, \
/* 5 */ {{105,2}, {105,2}, {105,2}, {105,2}, {105,2}, {105,2}, {105,2}, {105,2}, {105,2}, { 5, 0}, { 6, 0}, {105,2}, { 8, 0}, {105,2}, {105,2}, {105,2}, {105,2}}, \
/* 6 */ {{100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, { 7, 0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,2}, {100,0}}, \
/* 7 */ {{106,2}, {106,2}, {106,2}, {106,2}, {106,2}, {106,2}, {106,2}, {106,2}, {106,2}, { 7, 0}, {106,2}, {106,2}, { 8, 0}, {106,2}, {106,2}, {106,2}, {106,2}}, \
/* 8 */ {{100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, { 9, 0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,2}, {100,0}}, \
/* 9 */ {{106,2}, {106,2}, {106,2}, {106,2}, {106,2}, {106,2}, {106,2}, {106,2}, {106,2}, { 9, 0}, {106,2}, {106,2}, {106,2}, {106,2}, {106,2}, {106,2}, {106,2}}, \
/* 10*/ {{ 10,0}, { 10,0}, { 10,0}, { 10,0}, { 10,0}, { 10,0}, { 10,0}, { 10,0}, { 10,0}, { 10,0}, { 10,0}, { 11,1}, { 10,0}, { 10,0}, { 10,0}, {100,2}, { 10,0}}, \
/* 11*/ {{107,2}, {107,2}, {107,2}, {107,2}, {107,2}, {107,2}, {107,2}, {107,2}, {107,2}, {107,2}, {107,2}, { 10,0}, {107,2}, {107,2}, {107,2}, {107,2}, {107,2}}}
// This is the table containing the character class to which a particular
// character belongs. This is used to index the state transition table.
// Basically, for each of the characters possible, this points to one of the
// columns in the state transition table defined above.
// Most of them are 14 indicating that they are 'other'
#define gCharClassTable { \
15, 14, 14, 14, 14, 14, 14, 14, 14, 13, 13, 13, 13, 13, 14, 14, \
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \
13, 14, 14, 14, 14, 14, 14, 11, 4, 5, 3, 6, 16, 7, 10, 14, \
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 14, 14, 0, 2, 1, 14, \
14, 8, 8, 8, 8, 12, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, \
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 14, 14, 14, 14, 14, \
14, 8, 8, 8, 8, 12, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, \
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 14, 14, 14, 14, 14, \
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \
}
// structure representing an entry in the DFA;
typedef struct DFA_STATE {
DWORD dwNextState;
DWORD dwActionId;
}DFA_STATE;
//CLexeme maintains the lexeme corresponding to the current token
class CLexeme
{
public:
CLexeme();
HRESULT
PushNextChar(
WCHAR wcNextChar);
~CLexeme();
void
ResetLexeme() { _dwIndex = 0; }
LPWSTR
CLexeme::GetLexeme() { return (_pszLexeme); }
private:
LPWSTR _pszLexeme;
DWORD _dwMaxLength;
DWORD _dwIndex;
};
//CLexer maintains all the state information and returns the next token
class CLexer
{
public:
// Initialize the lexer with the string szBuffer.
CLexer(LPWSTR szBuffer);
~CLexer();
// Return the next token and its value.
HRESULT
CLexer::GetNextToken(LPWSTR *szToken, LPDWORD pdwToken);
HRESULT
CLexer::GetCurrentToken(
LPWSTR *ppszToken,
LPDWORD pdwToken
);
private:
WCHAR
CLexer::NextChar();
void
CLexer::PushbackChar();
DWORD
CLexer::GetCharClass(WCHAR wc) {
if(wc < 256)
return (_pCharClassTable[wc]);
else
// some unicode character; put in the other class.
return (OTHER_CHAR_CLASS);
}
// Given the currentState reached and the character just scanned and the
// action id, perform the action
HRESULT
CLexer::PerformAction(
DWORD dwCurrState,
WCHAR wcCurrChar,
DWORD dwActionId
);
DWORD
CLexer::GetTokenFromState(
DWORD dwCurrState
);
// The common DFA state transition table for all the instances of the class
static DFA_STATE _pStateTable[][MAX_CHAR_CLASSES];
// The common table mapping the characters to the character classes.
static DWORD _pCharClassTable[];
// The table to hold the keywords
static WCHAR _pKeywordTable[][MAX_KEYWORD_LEN];
static DWORD _pKW2Token[];
LPWSTR _Buffer; // String being analysed
LPWSTR _ptr; // pointer to the next character to be analysed.
DFA_STATE _currState; // maintains the state information for the DFA
DWORD _dwState; // maintains the state information for the DFA
DWORD _dwEndofString; // To indicate end of pattern
CLexeme _lexeme;
DWORD _dwStateSave; // maintains the state information for the DFA
BOOL _bInitialized;
};