//+--------------------------------------------------------------------------- // // Copyright (C) 1996, Microsoft Corporation. // // File: tokstr.cxx // // Contents: Used to break down a string into its tokens // // History: 96/Feb/13 DwightKr Created // //---------------------------------------------------------------------------- #include #pragma hdrstop //+--------------------------------------------------------------------------- // // Method: CTokenizeString::CTokenizeString - public constructor // // History: 96/Jan/23 DwightKr Created // //---------------------------------------------------------------------------- CTokenizeString::CTokenizeString( WCHAR const * wcsString ) : _wcsString(wcsString), _wcsCurrentToken(wcsString), _wcsNextToken(wcsString) { Accept(); } //+--------------------------------------------------------------------------- // // Method: CTokenizeString::Accept - public // // History: 96/Jan/23 DwightKr Created // //---------------------------------------------------------------------------- void CTokenizeString::Accept() { EatWhiteSpace(); _wcsCurrentToken = _wcsNextToken; switch ( *_wcsCurrentToken ) { case L'"': _wcsNextToken++; _token = QUOTES_TOKEN; break; case L'{': _wcsNextToken++; _token = C_OPEN_TOKEN; break; case L'}': _wcsNextToken++; _token = C_CLOSE_TOKEN; break; case L',': _wcsNextToken++; _token = COMMA_TOKEN; break; case 0: _token = EOS_TOKEN; break; default: _wcsNextToken = _wcsCurrentToken + wcscspn( _wcsCurrentToken, WORD_STR ); _token = TEXT_TOKEN; break; } } //+--------------------------------------------------------------------------- // // Member: CTokenizeString:AcqWord, public // // Synopsis: Copies the word that _wcsCurrentToken is pointing to and // returns the new string. Positions _wcsCurrentToken after // the word and whitespace. Returns 0 if at the end of a // TEXT_TOKEN. // // History: 96-Feb-13 DwightKr Created. // //---------------------------------------------------------------------------- WCHAR * CTokenizeString::AcqWord() { if ( IsEndOfTextToken() ) return 0; WCHAR const * pEnd = _wcsNextToken; int cwcToken = (int)(pEnd - _wcsCurrentToken + 1); WCHAR * newBuf = new WCHAR [ cwcToken ]; RtlCopyMemory( newBuf, _wcsCurrentToken, cwcToken * sizeof(WCHAR)); newBuf[cwcToken-1] = 0; _wcsCurrentToken = pEnd; while ( iswspace(*_wcsCurrentToken) ) _wcsCurrentToken++; return newBuf; } //+--------------------------------------------------------------------------- // // Member: CTokenizeString::GetNumber, public // // Synopsis: If _text is at the end of the TEXT_TOKEN, returns FALSE. // If not, puts the unsigned _int64 from the scanner into number // and returns TRUE. // // Arguments: [number] -- the unsigned _int64 which will be changed and // passed back out as the ULONG from the scanner. // // Notes: May be called several times in a loop before Accept() is // called. // // History: 96-Feb-13 AmyA Created // //---------------------------------------------------------------------------- BOOL CTokenizeString::GetNumber( unsigned _int64 & number ) { ULONG base = 10; WCHAR const * wcsCurrentToken = _wcsCurrentToken; if ( IsEndOfTextToken() || !iswdigit(*_wcsCurrentToken) || (*_wcsCurrentToken == L'-') ) { return FALSE; } if ( _wcsCurrentToken[0] == L'0' && (_wcsCurrentToken[1] == L'x' || _wcsCurrentToken[1] == L'X')) { _wcsCurrentToken += 2; base = 16; } number = _wcstoui64( _wcsCurrentToken, (WCHAR **)(&_wcsCurrentToken), base ); // // looks like a real number? // if ( ( wcsCurrentToken == _wcsCurrentToken ) || ( L'.' == *_wcsCurrentToken ) ) { _wcsCurrentToken = wcsCurrentToken; return FALSE; } while ( iswspace(*_wcsCurrentToken) ) _wcsCurrentToken++; return TRUE; } //+--------------------------------------------------------------------------- // // Member: CTokenizeString::GetNumber, public // // Synopsis: If _wcsCurrentToken is at the end of the TEXT_TOKEN, returns FALSE. // If not, puts the _int64 from the scanner into number and // returns TRUE. // // Arguments: [number] -- the _int64 which will be changed and passed back // out as the _int64 from the scanner. // // Notes: May be called several times in a loop before Accept() is // called. // // History: 96-Feb-13 DwightKr Created // //---------------------------------------------------------------------------- BOOL CTokenizeString::GetNumber( _int64 & number ) { WCHAR *text = (WCHAR *) _wcsCurrentToken; BOOL IsNegative = FALSE; if ( L'-' == _wcsCurrentToken[0] ) { IsNegative = TRUE; _wcsCurrentToken++; } unsigned _int64 ui64Number; if ( !GetNumber( ui64Number ) ) { _wcsCurrentToken = text; return FALSE; } if ( IsNegative ) { if ( ui64Number > 0x8000000000000000L ) { _wcsCurrentToken = text; return FALSE; } number = -((_int64) ui64Number); } else { number = (_int64) ui64Number; } return TRUE; } //+--------------------------------------------------------------------------- // // Member: CTokenizeString::GetNumber, public // // Synopsis: If _wcsCurrentToken is at the end of the TEXT_TOKEN, returns FALSE. // If not, puts the LONG from the scanner into number and // returns TRUE. // // Arguments: [number] -- the double which will be changed and passed back // out as the double from the scanner. // // Notes: May be called several times in a loop before Accept() is // called. // // History: 96-Feb-13 DwightKr Created // //---------------------------------------------------------------------------- BOOL CTokenizeString::GetNumber( double & number ) { if ( IsEndOfTextToken() || ((L'-' != *_wcsCurrentToken) && (iswdigit(*_wcsCurrentToken) == 0) ) ) { return FALSE; } if ( swscanf( _wcsCurrentToken, L"%lf", &number ) != 1 ) { return FALSE; } while ( iswspace(*_wcsCurrentToken) != 0 ) _wcsCurrentToken++; return TRUE; } //+--------------------------------------------------------------------------- // // Member: CTokenizeString::GetGUID, public // // Synopsis: If _wcsCurrentToken is at the end of the TEXT_TOKEN, returns FALSE. // If not, puts the guid into guid & returns TRUE; // // Arguments: [guid] -- the guid which will be changed and passed back // out as the output from the scanner. // // Notes: May be called several times in a loop before Accept() is // called. // // History: 96-Feb-13 DwightKr Created // //---------------------------------------------------------------------------- BOOL CTokenizeString::GetGUID( GUID & guid ) { if ( IsEndOfTextToken() || !iswdigit(*_wcsCurrentToken) ) return FALSE; // 0123456789 123456789 123456789 123456 // A guid MUST have the syntax XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX // // // Don't use wsscanf. We're scanning into *bytes*, but wsscanf assumes // result locations are *dwords*. Thus a write to the last few bytes of // the guid writes over other memory! // WCHAR wcsGuid[37]; RtlZeroMemory( wcsGuid, sizeof(wcsGuid) ); wcsncpy( wcsGuid, _wcsCurrentToken, 36 ); if ( wcsGuid[8] != L'-' ) return FALSE; wcsGuid[8] = 0; WCHAR * pwcStart = &wcsGuid[0]; WCHAR * pwcEnd; guid.Data1 = wcstoul( pwcStart, &pwcEnd, 16 ); if ( pwcEnd < &wcsGuid[8] ) // Non-digit found before wcsGuid[8] return FALSE; if ( wcsGuid[13] != L'-' ) return FALSE; wcsGuid[13] = 0; pwcStart = &wcsGuid[9]; guid.Data2 = (USHORT)wcstoul( pwcStart, &pwcEnd, 16 ); if ( pwcEnd < &wcsGuid[13] ) return FALSE; if ( wcsGuid[18] != L'-' ) return FALSE; wcsGuid[18] = 0; pwcStart = &wcsGuid[14]; guid.Data3 = (USHORT)wcstoul( pwcStart, &pwcEnd, 16 ); if ( pwcEnd < &wcsGuid[18] ) return FALSE; WCHAR wc = wcsGuid[21]; wcsGuid[21] = 0; pwcStart = &wcsGuid[19]; guid.Data4[0] = (unsigned char)wcstoul( pwcStart, &pwcEnd, 16 ); if ( pwcEnd < &wcsGuid[21] ) return FALSE; wcsGuid[21] = wc; if ( wcsGuid[23] != L'-' ) return FALSE; wcsGuid[23] = 0; pwcStart = &wcsGuid[21]; guid.Data4[1] = (unsigned char)wcstoul( pwcStart, &pwcEnd, 16 ); if ( pwcEnd < &wcsGuid[23] ) return FALSE; for ( unsigned i = 0; i < 6; i++ ) { wc = wcsGuid[26+i*2]; wcsGuid[26+i*2] = 0; pwcStart = &wcsGuid[24+i*2]; guid.Data4[2+i] = (unsigned char)wcstoul( pwcStart, &pwcEnd, 16 ); if ( pwcEnd < &wcsGuid[26+i*2] ) return FALSE; wcsGuid[26+i*2] = wc; } _wcsCurrentToken += 36; _wcsNextToken = _wcsCurrentToken; EatWhiteSpace(); return TRUE; } //+--------------------------------------------------------------------------- // // Member: CTokenizeString::AcqPhrase, public // // Synopsis: gets all characters up to end-of-line or next quote // // History: 96-Feb-13 DwightKr Created // //---------------------------------------------------------------------------- WCHAR * CTokenizeString::AcqPhrase() { // // Find the closing " // WCHAR const * wcsClosingQuote = _wcsCurrentToken; do { if ( 0 == *wcsClosingQuote ) break; if ( L'"' == *wcsClosingQuote ) { if ( L'"' == *(wcsClosingQuote+1) ) wcsClosingQuote++; else break; } wcsClosingQuote++; } while ( TRUE ); // // We've found the closing quote. Build a buffer big enough to // contain the string. // ULONG cwcToken = (ULONG)(wcsClosingQuote - _wcsCurrentToken + 1); XArray wcsToken( cwcToken ); // // copy the string, but remove the extra quote characters // WCHAR * pwcNewBuf = wcsToken.GetPointer(); WCHAR const * pStart = _wcsCurrentToken; while ( pStart < wcsClosingQuote ) { *pwcNewBuf++ = *pStart++; if ( L'"' == *pStart ) pStart++; } *pwcNewBuf = 0; _wcsCurrentToken += cwcToken - 1; _wcsNextToken = _wcsCurrentToken; EatWhiteSpace(); return wcsToken.Acquire(); } //+--------------------------------------------------------------------------- // // Member: CTokenizeString::AcqVector, public // // Synopsis: Gets each of the vector elements upto the next } // // History: 96-Feb-13 DwightKr Created // //---------------------------------------------------------------------------- void CTokenizeString::AcqVector( PROPVARIANT & propVariant ) { // // Determine the VT type of this vector. // GUID guid; _int64 i64Value; double dblValue; if ( GetGUID( guid ) ) { propVariant.vt = VT_CLSID | VT_VECTOR; propVariant.cauuid.cElems = 0; CDynArrayInPlace pElems; do { Accept(); pElems.Add( guid, propVariant.cauuid.cElems ); propVariant.cauuid.cElems++; if ( LookAhead() == COMMA_TOKEN ) { Accept(); } } while ( GetGUID( guid ) ); propVariant.cauuid.pElems = pElems.Acquire(); } else if ( GetNumber( i64Value ) ) { propVariant.vt = VT_I8 | VT_VECTOR; propVariant.cah.cElems = 0; CDynArrayInPlace<_int64> pElems; do { Accept(); pElems.Add( i64Value, propVariant.cah.cElems ); propVariant.cah.cElems++; if ( LookAhead() == COMMA_TOKEN ) { Accept(); } } while ( GetNumber( i64Value ) ); propVariant.cah.pElems = (LARGE_INTEGER *) pElems.Acquire(); } else if ( GetNumber( dblValue ) ) { propVariant.vt = VT_R8 | VT_VECTOR; propVariant.cadbl.cElems = 0; CDynArrayInPlace pElems; do { Accept(); pElems.Add( dblValue, propVariant.cadbl.cElems ); propVariant.cadbl.cElems++; if ( LookAhead() == COMMA_TOKEN ) { Accept(); } } while ( GetNumber( dblValue ) ); propVariant.cadbl.pElems = pElems.Acquire(); } else { propVariant.vt = VT_LPWSTR | VT_VECTOR; CDynArrayInPlace pElems; propVariant.calpwstr.cElems = 0; while ( (LookAhead() != C_CLOSE_TOKEN) && (LookAhead() != EOS_TOKEN) ) { // // If its a quoted string, get everything between the quotes. // if ( LookAhead() == QUOTES_TOKEN ) { Accept(); // Skip over the quote pElems.Add(AcqPhrase(), propVariant.calpwstr.cElems ); Accept(); // Skip over the string if ( LookAhead() != QUOTES_TOKEN ) { THROW( CHTXException(MSG_CI_HTX_MISSING_QUOTE, 0, 0) ); } Accept(); // Skip over the quote } else { // // Get the next word // pElems.Add( AcqWord(), propVariant.calpwstr.cElems ); Accept(); // Skip over the string } propVariant.calpwstr.cElems++; if ( LookAhead() == COMMA_TOKEN ) { Accept(); } } propVariant.calpwstr.pElems = pElems.Acquire(); } }