windows-nt/Source/XPSP1/NT/inetsrv/query/qutil/querylib/scanner.cxx
2020-09-26 16:20:57 +08:00

1333 lines
36 KiB
C++

//+---------------------------------------------------------------------------
//
// Copyright (C) 1994-1998, Microsoft Corporation.
//
// File: SCANNER.CXX
//
// Contents: Implementation of CQueryScanner
//
// History: 22-May-92 AmyA Created.
// 23-Jun-92 MikeHew Added weight token recognition.
// 17-May-94 t-jeffc Added error info and reg ex support.
//
//----------------------------------------------------------------------------
#include <pch.cxx>
#pragma hdrstop
//+---------------------------------------------------------------------------
//
// Member: CQueryScanner::CQueryScanner, public
//
// Synopsis: Create a scanner from a string.
//
// Arguments: [buffer] -- the string to be scanned.
// [fLookForTextualKeywords] -- TRUE if the scanner should
// look for "and/or/not/near" in
// text form.
// [lcid] -- language for and/or/not/near detection
// [fTreatPlusAsToken] -- TRUE if the scanner should treat the
// '+' character as a token (used
// in GroupBy parsing)
//
// Notes: This string is not copied, so the scanner does not own it.
// If the string is changed outside of the scanner, it will
// affect the information that is returned.
//
// History: 30-Apr-92 AmyA Created
//
//----------------------------------------------------------------------------
CQueryScanner::CQueryScanner(
WCHAR const * buffer,
BOOL fLookForTextualKeywords,
LCID lcid,
BOOL fTreatPlusAsToken )
: _text( buffer ),
_pBuf( buffer ),
_pLookAhead( buffer ),
_fLookForTextualKeywords( fLookForTextualKeywords ),
_fTreatPlusAsToken( fTreatPlusAsToken ),
_lcid( lcid )
{
Accept();
}
//+---------------------------------------------------------------------------
//
// Member: CQueryScanner::AcceptWord, public
//
// Synopsis: Consumes a single word out of a phrase
//
// Requires: Should be called after AcqWord
//
// History: 15-Sep-92 BartoszM Created
//
//----------------------------------------------------------------------------
void CQueryScanner::AcceptWord()
{
_pLookAhead = _text;
Accept();
}
//+---------------------------------------------------------------------------
//
// Member: CQueryScanner::AcceptColumn, public
//
// Synopsis: Consumes a column name out of a phrase
//
// Requires: Should be called after AcqColumn
//
// History: 15-Sep-92 BartoszM Created
//
//----------------------------------------------------------------------------
void CQueryScanner::AcceptColumn()
{
AcceptWord();
}
struct SStringToken
{
WCHAR * pwcToken;
unsigned cwc;
Token token;
};
static SStringToken s_EnglishStringTokens[] =
{
{ L"AND", (sizeof L"AND" / sizeof WCHAR) - 1, AND_TOKEN },
{ L"OR", (sizeof L"OR" / sizeof WCHAR) - 1, OR_TOKEN },
{ L"NOT", (sizeof L"NOT" / sizeof WCHAR) - 1, NOT_TOKEN },
{ L"NEAR", (sizeof L"NEAR" / sizeof WCHAR) - 1, PROX_TOKEN },
};
static SStringToken s_GermanStringTokens[] =
{
{ L"UND", (sizeof L"UND" / sizeof WCHAR) - 1, AND_TOKEN },
{ L"ODER", (sizeof L"ODER" / sizeof WCHAR) - 1, OR_TOKEN },
{ L"NICHT", (sizeof L"NICHT" / sizeof WCHAR) - 1, NOT_TOKEN },
{ L"NAH", (sizeof L"NAH" / sizeof WCHAR) - 1, PROX_TOKEN },
};
static SStringToken s_FrenchStringTokens[] =
{
{ L"ET", (sizeof L"ET" / sizeof WCHAR) - 1, AND_TOKEN },
{ L"OU", (sizeof L"OU" / sizeof WCHAR) - 1, OR_TOKEN },
{ L"SANS", (sizeof L"SANS" / sizeof WCHAR) - 1, NOT_TOKEN },
{ L"PRES", (sizeof L"PRES" / sizeof WCHAR) - 1, PROX_TOKEN },
};
static SStringToken s_SpanishStringTokens[] =
{
{ L"Y", (sizeof L"Y" / sizeof WCHAR) - 1, AND_TOKEN },
{ L"O", (sizeof L"O" / sizeof WCHAR) - 1, OR_TOKEN },
{ L"NO", (sizeof L"NO" / sizeof WCHAR) - 1, NOT_TOKEN },
{ L"CERCA", (sizeof L"CERCA" / sizeof WCHAR) - 1, PROX_TOKEN },
};
static SStringToken s_DutchStringTokens[] =
{
{ L"EN", (sizeof L"EN" / sizeof WCHAR) - 1, AND_TOKEN },
{ L"OF", (sizeof L"OF" / sizeof WCHAR) - 1, OR_TOKEN },
{ L"NIET", (sizeof L"NIET" / sizeof WCHAR) - 1, NOT_TOKEN },
{ L"NABIJ", (sizeof L"NABIJ" / sizeof WCHAR) - 1, PROX_TOKEN },
};
static WCHAR aSwedishNear[] = { L'N', 0xc4, L'R', L'A', 0 };
static SStringToken s_SwedishStringTokens[] =
{
{ L"OCH", (sizeof L"OCH" / sizeof WCHAR) - 1, AND_TOKEN },
{ L"ELLER", (sizeof L"ELLER" / sizeof WCHAR) - 1, OR_TOKEN },
{ L"INTE", (sizeof L"INTE" / sizeof WCHAR) - 1, NOT_TOKEN },
{ aSwedishNear, 4, PROX_TOKEN },
};
static SStringToken s_ItalianStringTokens[] =
{
{ L"E", (sizeof L"E" / sizeof WCHAR) - 1, AND_TOKEN },
{ L"O", (sizeof L"O" / sizeof WCHAR) - 1, OR_TOKEN },
{ L"NO", (sizeof L"NO" / sizeof WCHAR) - 1, NOT_TOKEN },
{ L"VICINO", (sizeof L"VICINO" / sizeof WCHAR) - 1, PROX_TOKEN },
};
const unsigned cStringTokens = sizeof(s_EnglishStringTokens) /
sizeof(s_EnglishStringTokens[0]);
#define WORD_STR L"{}!&|~*@#()[],\t=<>\n\"^ "
//+---------------------------------------------------------------------------
//
// Function: InternalFindStringToken
//
// Synopsis: Looks for a textual token in plain text.
//
// Arguments: [pwcIn] -- string to search
// [token] -- returns the token found
// [cwc] -- returns length of token found
// [pTokens] -- token array to use
//
// Returns: Pointer to token or 0 if none was found
//
// History: 08-Feb-96 dlee created
//
//----------------------------------------------------------------------------
WCHAR * InternalFindStringToken(
WCHAR * pwcIn,
Token & token,
unsigned & cwc,
SStringToken * pTokens )
{
// for each of and/or/not/near
WCHAR *pwcOut = 0;
for ( unsigned i = 0; i < cStringTokens; i++ )
{
WCHAR *pwcStr = wcsstr( pwcIn, pTokens[i].pwcToken );
while ( pwcStr )
{
// found a match -- does it have white space on either side?
WCHAR wcBeyond = * (pwcStr + pTokens[i].cwc);
if ( ( ( 0 == wcBeyond ) ||
( wcschr( WORD_STR, wcBeyond ) ) ) &&
( ( pwcStr == pwcIn ) ||
( iswspace( * ( pwcStr - 1 ) ) ) ) )
{
// if the first match found or the match closest to the
// beginning of the string, use it.
if ( ( 0 == pwcOut ) ||
( pwcStr < pwcOut ) )
{
pwcOut = pwcStr;
token = pTokens[i].token;
cwc = pTokens[i].cwc;
}
break;
}
pwcStr = wcsstr( pwcStr + 1, pTokens[i].pwcToken );
}
}
return pwcOut;
} //InternalFindStringToken
SStringToken * GetStringTokenArray(
LCID lcid )
{
SStringToken *pTokens;
switch ( PRIMARYLANGID( LANGIDFROMLCID( lcid ) ) )
{
case LANG_GERMAN :
pTokens = s_GermanStringTokens;
break;
case LANG_FRENCH :
pTokens = s_FrenchStringTokens;
break;
case LANG_SPANISH :
pTokens = s_SpanishStringTokens;
break;
case LANG_DUTCH :
pTokens = s_DutchStringTokens;
break;
case LANG_SWEDISH :
pTokens = s_SwedishStringTokens;
break;
case LANG_ITALIAN :
pTokens = s_ItalianStringTokens;
break;
case LANG_NEUTRAL :
case LANG_ENGLISH :
default :
pTokens = s_EnglishStringTokens;
break;
}
Win4Assert( 0 != pTokens );
return pTokens;
}
//+---------------------------------------------------------------------------
//
// Function: FindStringToken
//
// Synopsis: Looks for a textual token in plain text. Always tries
// English, tries a different language depending on _lcid.
//
// Arguments: [pwcIn] -- string to search
// [token] -- returns the token found
// [cwc] -- returns length of token found
//
// Returns: Pointer to token or 0 if none was found
//
// History: 08-Feb-96 dlee created
//
//----------------------------------------------------------------------------
WCHAR * CQueryScanner::FindStringToken(
WCHAR * pwcIn,
Token & token,
unsigned & cwc )
{
SStringToken * pTokens = GetStringTokenArray( _lcid );
WCHAR * pwcToken = InternalFindStringToken( pwcIn, token, cwc, pTokens );
// if the search above wasn't in English, try English too.
if ( pTokens != s_EnglishStringTokens )
{
unsigned cwcEnglish;
Token tokenEnglish;
WCHAR * pwcEnglish = InternalFindStringToken( pwcIn,
tokenEnglish,
cwcEnglish,
s_EnglishStringTokens );
// If there is no language-specific match or the English match
// occurs before the language-specific match, use the English
// match.
if ( ( 0 != pwcEnglish ) &&
( ( 0 == pwcToken ) || ( pwcEnglish < pwcToken ) ) )
{
pwcToken = pwcEnglish;
token = tokenEnglish;
cwc = cwcEnglish;
}
}
return pwcToken;
} //FindStringToken
//+---------------------------------------------------------------------------
//
// Member: CQueryScanner::Accept, public
//
// Synopsis: Determines what the next token is. Will advance _pLookAhead
// over the next token and white space.
//
// Notes: There are five different types of TEXT_TOKENS, Phrase, Path,
// Number, Column and Command. Since the length of the token
// depends on which token it is, _pLookAhead is forwarded to the
// end of the longest, and _text is used to parse the token in the
// various Acq and Get methods.
//
// History: 30-Apr-92 AmyA Created
// 19-May-92 AmyA Added Guid hack
// 23-Jun-92 MikeHew Added weight token recognition.
// 26-May-94 t-jeffc Added more tokens; rearranged to
// support parsing errors
//
//----------------------------------------------------------------------------
void CQueryScanner::Accept()
{
EatWhiteSpace();
_text = _pLookAhead;
switch ( *_pLookAhead )
{
case '&':
_pLookAhead++;
_token = AND_TOKEN;
break;
case '*':
_pLookAhead++;
if ( *_pLookAhead == '*' )
{
_token = FUZ2_TOKEN;
_pLookAhead++;
}
else
_token = FUZZY_TOKEN;
break;
case '=':
_pLookAhead++;
_token = EQUAL_TOKEN;
break;
case '<':
_pLookAhead++;
if ( *_pLookAhead == '=' )
{
_token = LESS_EQUAL_TOKEN;
_pLookAhead++;
}
else
_token = LESS_TOKEN;
break;
case '>':
_pLookAhead++;
if ( *_pLookAhead == '=' )
{
_token = GREATER_EQUAL_TOKEN;
_pLookAhead++;
}
else
_token = GREATER_TOKEN;
break;
case '!':
_pLookAhead++;
if ( *_pLookAhead == '=' )
{
_token = NOT_EQUAL_TOKEN;
_pLookAhead++;
}
else
{
_token = NOT_TOKEN;
}
break;
case '|':
_pLookAhead++;
_token = OR_TOKEN;
break;
case '~':
_pLookAhead++;
_token = PROX_TOKEN;
break;
case '@':
_pLookAhead++;
_token = PROP_TOKEN;
break;
case '#':
_pLookAhead++;
_token = PROP_REGEX_TOKEN;
break;
case '(':
_pLookAhead++;
_token = OPEN_TOKEN;
break;
case ')':
_pLookAhead++;
_token = CLOSE_TOKEN;
break;
case '[':
_pLookAhead++;
_token = W_OPEN_TOKEN;
break;
case ']':
_pLookAhead++;
_token = W_CLOSE_TOKEN;
break;
case ',':
_pLookAhead++;
_token = COMMA_TOKEN;
break;
case '\0':
case 0x1A: // CTRL-Z
_token = EOS_TOKEN;
break;
case '"':
_pLookAhead++;
_token = QUOTES_TOKEN;
break;
case '$':
_pLookAhead++;
_token = PROP_NATLANG_TOKEN;
break;
case '{':
_pLookAhead++;
_token = C_OPEN_TOKEN;
break;
case '}':
_pLookAhead++;
_token = C_CLOSE_TOKEN;
break;
case '^':
{
WCHAR wc = *(_pLookAhead + 1);
BOOL fOk = TRUE;
if (L'a' == wc) // all bits
_token = ALLOF_TOKEN;
else if (L's' == wc) // some bits
_token = SOMEOF_TOKEN;
else
fOk = FALSE;
if (fOk)
{
_pLookAhead += 2;
break;
}
}
// FALL THROUGH
case '+':
if (*_pLookAhead == L'+' && _fTreatPlusAsToken)
{
_pLookAhead++;
_token = PLUS_TOKEN;
break;
}
// FALL THROUGH
default:
{
// forwards pwcEnd over anything that could be in a phrase,
// which is the most inclusive of the TEXT_TOKENs.
// (except, for regex's and phrases in quotes - but they're
// handled separately)
WCHAR const *pwcEnd = _text + wcscspn( _text, PHRASE_STR );
if ( _fLookForTextualKeywords )
{
unsigned cwc = (unsigned) ( pwcEnd - _text );
cwc = __min( cwc, MAX_PATH * 2 );
// if a textual keyword is beyond 500 chars in the string,
// blow it off -- the workaround is to use the '&|~' version.
WCHAR awcBuf[ 1 + MAX_PATH * 2 ];
RtlCopyMemory( awcBuf, _text, cwc * sizeof WCHAR );
awcBuf[ cwc ] = 0;
ULONG cwcOut = LCMapString( _lcid,
LCMAP_UPPERCASE,
awcBuf,
cwc,
awcBuf,
cwc );
if ( cwcOut != cwc )
THROW( CException() );
Token token;
unsigned cwcToken;
WCHAR *pwcTok = FindStringToken( awcBuf, token, cwcToken );
if ( 0 != pwcTok )
{
// a textual token exists in the string
if ( pwcTok == awcBuf )
{
// textual token at the start of the string
_token = token;
_pLookAhead = _text + cwcToken;
}
else
{
// textual token in the middle of the string, stop the
// current token at that point and get it next time
// Accept() is called.
_pLookAhead = _text + ( pwcTok - awcBuf );
_token = TEXT_TOKEN;
}
}
else
{
_pLookAhead = pwcEnd;
_token = TEXT_TOKEN;
}
}
else
{
_pLookAhead = pwcEnd;
_token = TEXT_TOKEN;
}
break;
}
}
}
//+---------------------------------------------------------------------------
//
// Member: CQueryScanner::AllocReturnString, private inline
//
// Synopsis: Copies all of the relevant characters of the string that
// _text is pointing to and returns the new string.
//
// History: 17 Apr 97 AlanW Created
//
//----------------------------------------------------------------------------
inline WCHAR * CQueryScanner::AllocReturnString( int cch )
{
WCHAR * newBuf = new WCHAR [ cch + 1 ];
RtlCopyMemory ( newBuf, _text, cch * sizeof(WCHAR));
newBuf[cch] = L'\0';
_text += cch;
while ( iswspace(*_text) )
_text++;
return newBuf;
}
//+---------------------------------------------------------------------------
//
// Member: CQueryScanner::AcqPath, public
//
// Synopsis: Copies all of the relevant characters of the string that
// _text is pointing to and returns the new string. Will
// return 0 if _text is at end of whole TEXT_TOKEN.
//
// Notes: Since the string is copied, the caller of this function is
// responsible for freeing the memory occupied by the string.
// This method can be called several times before calling
// Accept(), so many paths can be acquired if they exist in the
// scanner.
//
// History: 30-Apr-92 AmyA Created
//
//----------------------------------------------------------------------------
WCHAR * CQueryScanner::AcqPath()
{
if ( IsEndOfTextToken() )
return 0;
// how many characters follow _text that are not in CMND_STR?
int count = wcscspn( _text, CMND_STR );
return AllocReturnString( count );
}
//+---------------------------------------------------------------------------
//
// Member: CQueryScanner::AcqWord, public
//
// Synopsis: Copies the word that _text is pointing to and returns the
// new string. Positions _text after the word and whitespace.
// Returns 0 if at the end of a TEXT_TOKEN.
//
// History: 29-Jun-92 MikeHew Created.
//
//----------------------------------------------------------------------------
WCHAR * CQueryScanner::AcqWord()
{
if ( IsEndOfTextToken() )
return 0;
WCHAR const * pEnd = _text;
while ( !iswspace(*pEnd) && pEnd < _pLookAhead )
pEnd++;
unsigned count = CiPtrToUint( pEnd - _text );
return AllocReturnString( count );
}
//+---------------------------------------------------------------------------
//
// Member: CQueryScanner::AcqColumn, public
//
// Synopsis: Copies a column name and returns the new string. A column
// name is either a single word, or a quoted string.
// Positions _text after the word and whitespace.
//
// Returns: WCHAR* pointer to column name. 0 if no column name found.
//
// History: 17 Apr 97 AlanW Created.
//
//----------------------------------------------------------------------------
WCHAR * CQueryScanner::AcqColumn()
{
if ( QUOTES_TOKEN == _token)
{
Accept();
WCHAR * pwszOut = AcqPhraseInQuotes();
_text = _pLookAhead;
return pwszOut;
}
if ( IsEndOfTextToken() )
return 0;
int count = wcscspn( _text, COLUMN_STR );
return AllocReturnString( count );
}
//+---------------------------------------------------------------------------
//
// Member: CQueryScanner::AcqPhrase, public
//
// Synopsis: Copies all of the relevant characters of the string that
// _text is pointing to and returns the new string.
// Returns 0 if at the end of a text token.
//
// Notes: Since the string is copied, the caller of this function is
// responsible for freeing the memory occupied by the string.
// The difference between this function and AcqPath is that this
// should only be called once before calling Accept().
//
// History: 30-Apr-92 AmyA Created
// 09-May-96 DwightKr Strip trailing white space
//
//----------------------------------------------------------------------------
WCHAR * CQueryScanner::AcqPhrase()
{
if( IsEndOfTextToken() )
return 0;
//
// Strip trailing white-space from the end of the phrase. _pLookAhead
// points to the first character of the NEXT phrase.
//
WCHAR const * pEnd = _pLookAhead - 1;
while ( (pEnd > _text) && iswspace(*pEnd) )
{
pEnd--;
}
unsigned count = CiPtrToUint( pEnd - _text ) + 1;
WCHAR * newBuf = new WCHAR [ count + 1 ];
RtlCopyMemory( newBuf, _text, count * sizeof( WCHAR ) );
newBuf[count] = 0;
return newBuf;
}
//+---------------------------------------------------------------------------
//
// Member: CQueryScanner::AcqRegEx, public
//
// Synopsis: Copies all of the relevant characters of the string that
// _text is pointing to and returns the new string. Matches
// the longest string possible - the only restriction is that
// the regex can not contain any of the characters in REGEX_STR
// outside of <> braces (which may be nested).
// Returns 0 if the regex is empty.
//
// Notes: Since the string is copied, the caller of this function is
// responsible for freeing the memory occupied by the string.
// Because some regex characters are duplicated in the query
// language, _pLookAhead is ignored (and actually reset) in
// this operation. Like AcqPhrase(), this should be called only
// once before Accept().
//
// History: 10-May-94 t-jeffc Created
//
//----------------------------------------------------------------------------
WCHAR * CQueryScanner::AcqRegEx()
{
WCHAR const * pEnd = _text;
BOOL fDone = FALSE;
BOOL fQuoted = FALSE;
if ( *pEnd == L'"' )
{
fQuoted = TRUE;
pEnd++;
}
// scan the string - stop at \0 or if any REGEX_STR characters are
// found outside of braces
//
for( ;; )
{
switch( *pEnd )
{
case '\0':
if ( fQuoted )
THROW( CException( QPARSE_E_UNEXPECTED_EOS ) );
fDone = TRUE;
break;
case ' ':
if ( !fQuoted )
fDone = TRUE;
break;
case ')':
if ( !fQuoted )
{
if ( ( pEnd != _text ) &&
( '|' != (*(pEnd-1)) ) )
fDone = TRUE;
}
break;
case '"':
if ( fQuoted )
{
pEnd++;
fDone = TRUE;
}
break;
default:
break;
} // switch( *pEnd )
if( fDone ) break;
pEnd++;
}
if( _text == pEnd )
return 0;
// set _pLookAhead
_pLookAhead = pEnd;
// copy the string
unsigned count = CiPtrToUint( _pLookAhead - _text );
if ( fQuoted )
{
Win4Assert( count >= 2 );
count -= 2;
}
WCHAR * newBuf = new WCHAR[ count + 1 ];
RtlCopyMemory( newBuf, _text + (fQuoted ? 1 : 0), count * sizeof( WCHAR ) );
newBuf[ count ] = 0;
return newBuf;
}
//+---------------------------------------------------------------------------
//
// Member: CQueryScanner::AcqPhraseInQuotes, public
//
// Synopsis: Copies all characters until a matching " is found, or until
// the end of string. Embedded quotes are escaped with a quote:
// "Bill ""the man"" Gates"
//
// Notes: Since the string is copied, the caller of this function is
// responsible for freeing the memory occupied by the string.
//
// History: 18-Jan-95 SitaramR Created
// 3-Jul-96 dlee added embedded quotes
//
//----------------------------------------------------------------------------
WCHAR * CQueryScanner::AcqPhraseInQuotes()
{
WCHAR const * pEnd = _text;
do
{
if ( 0 == *pEnd )
break;
if ( L'"' == *pEnd )
{
if ( L'"' == *(pEnd+1) )
pEnd++;
else
break;
}
pEnd++;
} while ( TRUE );
unsigned count = CiPtrToUint( pEnd - _text );
WCHAR * newBuf = new WCHAR [ count + 1 ];
WCHAR * pwcNewBuf = newBuf;
WCHAR const * pStart = _text;
// copy the string, but remove the extra quote characters
while ( pStart < pEnd )
{
*pwcNewBuf++ = *pStart++;
if ( L'"' == *pStart )
pStart++;
}
*pwcNewBuf = 0;
if ( *pEnd == L'"' )
_pLookAhead = pEnd + 1;
else
_pLookAhead = pEnd;
return newBuf;
}
//+---------------------------------------------------------------------------
//
// Member: CQueryScanner::GetNumber, public
//
// Synopsis: If _text is at the end of the TEXT_TOKEN, returns FALSE.
// If not, puts the ULONG from the scanner into number and
// returns TRUE.
//
// Arguments: [number] -- the ULONG which will be changed and passed back
// out as the ULONG from the scanner.
// [fAtEnd] -- returns TRUE if at the end of the scanned string
//
// Notes: May be called several times in a loop before Accept() is
// called.
//
// History: 11-May-92 AmyA Created
//
//----------------------------------------------------------------------------
BOOL CQueryScanner::GetNumber( ULONG & number, BOOL & fAtEnd )
{
if ( IsEndOfTextToken() || !iswdigit(*_text) || (*_text == L'-') )
return FALSE;
// is this a hex number?
ULONG base = 10;
if (_text[0] == L'0' && (_text[1] == L'x' || _text[1] == L'X'))
{
_text += 2;
base = 16;
}
const WCHAR * pwcStart = _text;
number = wcstoul( _text, (WCHAR **)(&_text), base );
// looks like a real number?
if ( ( pwcStart == _text ) ||
( L'.' == *_text ) )
return FALSE;
while ( iswspace(*_text) )
_text++;
fAtEnd = ( 0 == *_text );
return TRUE;
}
//+---------------------------------------------------------------------------
//
// Member: CQueryScanner::GetNumber, public
//
// Synopsis: If _text is at the end of the TEXT_TOKEN, returns FALSE.
// If not, puts the LONG from the scanner into number and
// returns TRUE.
//
// Arguments: [number] -- the LONG which will be changed and passed back
// out as the LONG from the scanner.
// [fAtEnd] -- returns TRUE if at the end of the scanned string
//
// Notes: May be called several times in a loop before Accept() is
// called.
//
// History: 96-Jan-15 DwightKr Created
//
//----------------------------------------------------------------------------
BOOL CQueryScanner::GetNumber( LONG & number, BOOL & fAtEnd )
{
WCHAR *text = (WCHAR *) _text;
BOOL IsNegative = FALSE;
ULONG ulMax = (ULONG) LONG_MAX;
if ( L'-' == _text[0] )
{
IsNegative = TRUE;
ulMax++; // can represent 1 more negative than positive.
_text++;
}
ULONG ulNumber;
if ( !GetNumber( ulNumber, fAtEnd ) )
{
_text = text;
return FALSE;
}
// Signed number overflow/underflow
if ( ulNumber > ulMax )
{
_text = text;
return FALSE;
}
if ( IsNegative )
{
if ( ulMax == ulNumber )
number = LONG_MIN;
else
number = - (LONG) ulNumber;
}
else
{
number = (LONG) ulNumber;
}
return TRUE;
}
//+---------------------------------------------------------------------------
//
// Member: CQueryScanner::GetNumber, public
//
// Synopsis: If _text is at the end of the TEXT_TOKEN, returns FALSE.
// If not, puts the ULONG from the scanner into number and
// returns TRUE.
//
// Arguments: [number] -- the ULONG which will be changed and passed back
// out as the ULONG from the scanner.
// [fAtEnd] -- returns TRUE if at the end of the scanned string
//
// Notes: May be called several times in a loop before Accept() is
// called.
//
// History: 27-Feb-96 dlee Created
//
//----------------------------------------------------------------------------
BOOL CQueryScanner::GetNumber( unsigned _int64 & number, BOOL & fAtEnd )
{
if ( IsEndOfTextToken() || !iswdigit(*_text) || (*_text == L'-') )
return FALSE;
// is this a hex number?
ULONG base = 10;
if (_text[0] == L'0' && (_text[1] == L'x' || _text[1] == L'X'))
{
_text += 2;
base = 16;
}
const WCHAR * pwcStart = _text;
number = _wcstoui64( _text, (WCHAR **)(&_text), base );
// looks like a real number?
if ( ( pwcStart == _text ) ||
( L'.' == *_text ) )
return FALSE;
while ( iswspace(*_text) )
_text++;
fAtEnd = ( 0 == *_text );
return TRUE;
}
//+---------------------------------------------------------------------------
//
// Member: CQueryScanner::GetNumber, public
//
// Synopsis: If _text is at the end of the TEXT_TOKEN, returns FALSE.
// If not, puts the LONG from the scanner into number and
// returns TRUE.
//
// Arguments: [number] -- the LONG which will be changed and passed back
// out as the LONG from the scanner.
// [fAtEnd] -- returns TRUE if at the end of the scanned string
//
// Notes: May be called several times in a loop before Accept() is
// called.
//
// History: 27-Feb-96 dlee Created
//
//----------------------------------------------------------------------------
BOOL CQueryScanner::GetNumber( _int64 & number, BOOL & fAtEnd )
{
WCHAR *text = (WCHAR *) _text;
BOOL IsNegative = FALSE;
unsigned _int64 ullMax = (unsigned _int64) _I64_MAX;
if ( L'-' == _text[0] )
{
IsNegative = TRUE;
ullMax++; // can represent 1 more negative than positive.
_text++;
}
unsigned _int64 ullNumber;
if ( !GetNumber( ullNumber, fAtEnd ) )
{
_text = text;
return FALSE;
}
// Signed number overflow/underflow
if ( ullNumber > ullMax )
{
_text = text;
return FALSE;
}
if ( IsNegative )
{
if ( ullMax == ullNumber )
number = _I64_MIN;
else
number = -((_int64) ullNumber);
}
else
{
number = (_int64) ullNumber;
}
return TRUE;
}
//+---------------------------------------------------------------------------
//
// Member: CQueryScanner::GetNumber, public
//
// Synopsis: If _text is at the end of the TEXT_TOKEN, returns FALSE.
// If not, puts the LONG from the scanner into number and
// returns TRUE.
//
// Arguments: [number] -- the double which will be changed and passed back
// out as the double from the scanner.
//
// Notes: May be called several times in a loop before Accept() is
// called.
//
// History: 96-Jan-15 DwightKr Created
//
//----------------------------------------------------------------------------
BOOL CQueryScanner::GetNumber( double & number )
{
if ( IsEndOfTextToken() || !iswdigit(*_text) )
return FALSE;
if ( swscanf( _text, L"%lf", &number ) != 1 )
{
return FALSE;
}
while ( iswspace(*_text) != 0 )
_text++;
return TRUE;
}
//+---------------------------------------------------------------------------
//
// Member: CQueryScanner::GetCommandChar, public
//
// Synopsis: Returns the command character pointed to by _text and advances
// _text. If the command can't be uniquely determined by the
// first character, each subsequent call will return the next
// character in the word. After the command has been determined,
// AcceptCommand() should be called and then operand parsing may begin.
//
// History: 14-May-92 AmyA Created
// 16-May-94 t-jeffc Returns one character at a time to
// support more commands
//
//----------------------------------------------------------------------------
WCHAR CQueryScanner::GetCommandChar()
{
if( IsEndOfTextToken() )
return 0;
WCHAR chCommand = _text[0];
_text++;
return towlower( chCommand );
}
//+---------------------------------------------------------------------------
//
// Member: CQueryScanner::AcceptCommand, public
//
// Synopsis: Advances _text past any characters in the command.
// Used when enough command characters have been
// read to uniquely determine the command and begin parsing
// the operands.
//
// History: 16-May-94 t-jeffc Created
//
//----------------------------------------------------------------------------
void CQueryScanner::AcceptCommand()
{
int cChars = wcscspn( _text, CMND_STR ); // how many characters follow
// _text that are not in CMND_STR
_text += cChars;
_pLookAhead = _text;
Accept();
}
//+---------------------------------------------------------------------------
//
// Member: CQueryScanner::ResetBuffer, public
//
// Synopsis: Puts a new string into _pBuf and resets _pLookAhead
// accordingly.
//
// Arguments: [buffer] -- the new string for _pBuf
//
// History: 05-May-92 AmyA Created
//
//----------------------------------------------------------------------------
void CQueryScanner::ResetBuffer( WCHAR const * buffer )
{
_pBuf = buffer;
_pLookAhead = _pBuf;
Accept();
}
//+---------------------------------------------------------------------------
//
// Member: CQueryScanner::EatWhiteSpace, private
//
// Synopsis: Advances _pLookAhead past any white space in the string.
//
// History: 29-Apr-92 AmyA Created
//
//----------------------------------------------------------------------------
void CQueryScanner::EatWhiteSpace()
{
while ( iswspace(*_pLookAhead) != 0 )
_pLookAhead++;
}
//+---------------------------------------------------------------------------
//
// Member: CQueryScanner::IsEndOfTextToken, private
//
// Synopsis: Returns TRUE if the current token is not a TEXT_TOKEN or
// if the string starting at _text to _pLookAhead contains
// nothing but whitespace.
//
// History: 27-May-94 t-jeffc Created
//
//----------------------------------------------------------------------------
BOOL CQueryScanner::IsEndOfTextToken()
{
if( _token == TEXT_TOKEN && _text < _pLookAhead )
return FALSE;
else
return TRUE;
}
//+---------------------------------------------------------------------------
//
// Member: CQueryScanner::AcqLine, public
//
// Synopsis: Copies all of the remaining characters on the line;
// return 0 if _text is at end of whole TEXT_TOKEN.
//
// Arguments: [fParseQuotes] -- if TRUE, initial and final quotes are removed
//
// Notes: Since the string is copied, the caller of this function is
// responsible for freeing the memory occupied by the string.
// This method can be called several times before calling
// Accept(), so many paths can be acquired if they exist in the
// scanner.
//
// History: 96-Jan-03 DwightKr Created
// 96-Feb-26 DwightKr Allow lines to be quoted
//
//----------------------------------------------------------------------------
WCHAR * CQueryScanner::AcqLine( BOOL fParseQuotes )
{
if ( *_text == L'\0' )
return 0;
unsigned cwcBuffer = wcslen(_text);
//
// If there are \r, \n, or other white space at the end of the string,
// strip it off
//
while ( cwcBuffer > 0 && _text[cwcBuffer-1] <= L' ' )
cwcBuffer--;
if ( fParseQuotes )
{
//
// If there is a pair of quotes delimiting this line, strip them off
//
if ( (L'"' == _text[0]) && (cwcBuffer > 1) )
{
if ( L'"' == _text[cwcBuffer-1] )
cwcBuffer--;
_text++;
cwcBuffer--;
}
}
WCHAR *pText = new WCHAR [ cwcBuffer + 1 ];
RtlCopyMemory( pText, _text, cwcBuffer * sizeof(WCHAR) );
pText[cwcBuffer] = 0;
_pLookAhead = _text + cwcBuffer - 1;
return pText;
} //AcqLine