669 lines
9.2 KiB
C++
669 lines
9.2 KiB
C++
/*++
|
|
|
|
Copyright (c) 1995 Microsoft Corporation
|
|
|
|
Module Name :
|
|
|
|
parse.hxx
|
|
|
|
Abstract:
|
|
|
|
Simple parser class for extrapolating HTTP headers information
|
|
|
|
Author:
|
|
John Ludeman (JohnL) 18-Jan-1995
|
|
|
|
Project:
|
|
HTTP server
|
|
|
|
Revision History:
|
|
|
|
--*/
|
|
|
|
#include <tcpdllp.hxx>
|
|
# include <parse.hxx>
|
|
|
|
|
|
INET_PARSER::INET_PARSER(
|
|
CHAR * pszStart
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Sets the initial position of the buffer for parsing
|
|
|
|
Arguments:
|
|
|
|
pszStart - start of character buffer
|
|
pszEnd - End of buffer
|
|
|
|
Return Value:
|
|
|
|
--*/
|
|
: m_fListMode ( FALSE ),
|
|
m_pszPos ( pszStart ),
|
|
m_pszTokenTerm( NULL ),
|
|
m_pszLineTerm ( NULL )
|
|
{
|
|
DBG_ASSERT( pszStart );
|
|
|
|
//
|
|
// Chew up any initial white space at the beginning of the buffer
|
|
// and terminate the first token in the string.
|
|
//
|
|
|
|
EatWhite();
|
|
|
|
TerminateToken();
|
|
}
|
|
|
|
|
|
INET_PARSER::~INET_PARSER(
|
|
VOID
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Restores any changes we made to the string while parsing
|
|
|
|
Arguments:
|
|
|
|
--*/
|
|
{
|
|
RestoreBuffer();
|
|
}
|
|
|
|
|
|
CHAR *
|
|
INET_PARSER::QueryPos(
|
|
VOID
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Removes the terminators and returns the current parser position
|
|
|
|
Arguments:
|
|
|
|
Return Value:
|
|
|
|
Zero terminated string if we've reached the end of the buffer
|
|
|
|
--*/
|
|
{
|
|
RestoreToken();
|
|
RestoreLine();
|
|
|
|
return m_pszPos;
|
|
}
|
|
|
|
VOID
|
|
INET_PARSER::SetPtr(
|
|
CHAR * pch
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Sets the parser to point at a new location
|
|
|
|
Arguments:
|
|
|
|
pch - New position for parser to start parsing from
|
|
|
|
Return Value:
|
|
|
|
--*/
|
|
{
|
|
RestoreToken();
|
|
RestoreLine();
|
|
|
|
m_pszPos = pch;
|
|
}
|
|
|
|
|
|
CHAR *
|
|
INET_PARSER::QueryToken(
|
|
VOID
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Returns a pointer to the current zero terminated token
|
|
|
|
If list mode is on, then a comma is considered a delimiter.
|
|
|
|
Arguments:
|
|
|
|
Return Value:
|
|
|
|
Zero terminated string if we've reached the end of the buffer
|
|
|
|
--*/
|
|
{
|
|
if ( !m_pszTokenTerm )
|
|
TerminateToken( m_fListMode ? ',' : '\0' );
|
|
|
|
return m_pszPos;
|
|
}
|
|
|
|
|
|
CHAR *
|
|
INET_PARSER::QueryLine(
|
|
VOID
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Returns a pointer to the current zero terminated line
|
|
|
|
Arguments:
|
|
|
|
Return Value:
|
|
|
|
Zero terminated string if we've reached the end of the buffer
|
|
|
|
--*/
|
|
{
|
|
RestoreToken();
|
|
|
|
if ( !m_pszLineTerm )
|
|
TerminateLine();
|
|
|
|
return m_pszPos;
|
|
}
|
|
|
|
|
|
BOOL
|
|
INET_PARSER::CopyToken(
|
|
STR * pStr,
|
|
BOOL fAdvanceToken
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Copies the token at the current position to *pStr
|
|
|
|
Arguments:
|
|
|
|
pStr - Receives token
|
|
fAdvanceToken - True if we should advance to the next token
|
|
|
|
Return Value:
|
|
|
|
TRUE if successful, FALSE otherwise
|
|
|
|
--*/
|
|
{
|
|
BOOL fRet;
|
|
|
|
DBG_ASSERT( pStr );
|
|
|
|
if ( !m_pszTokenTerm )
|
|
TerminateToken();
|
|
|
|
fRet = pStr->Copy( m_pszPos );
|
|
|
|
if ( fAdvanceToken )
|
|
NextToken();
|
|
|
|
return fRet;
|
|
}
|
|
|
|
|
|
BOOL
|
|
INET_PARSER::CopyToEOL(
|
|
STR * pstr,
|
|
BOOL fAdvance
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Copies the token at the current character position
|
|
|
|
Arguments:
|
|
|
|
--*/
|
|
{
|
|
BOOL fRet;
|
|
|
|
RestoreToken();
|
|
|
|
if ( !m_pszLineTerm )
|
|
TerminateLine();
|
|
|
|
fRet = pstr->Copy( m_pszPos );
|
|
|
|
if ( fAdvance )
|
|
NextLine();
|
|
|
|
return fRet;
|
|
}
|
|
|
|
BOOL
|
|
INET_PARSER::AppendToEOL(
|
|
STR * pstr,
|
|
BOOL fAdvance
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Same as CopyToEOL except the text from the current line is appended to
|
|
pstr
|
|
|
|
Arguments:
|
|
|
|
--*/
|
|
{
|
|
BOOL fRet;
|
|
|
|
RestoreToken();
|
|
|
|
if ( !m_pszLineTerm )
|
|
TerminateLine();
|
|
|
|
fRet = pstr->Append( m_pszPos );
|
|
|
|
if ( fAdvance )
|
|
NextLine();
|
|
|
|
return fRet;
|
|
}
|
|
|
|
|
|
CHAR *
|
|
INET_PARSER::NextLine(
|
|
VOID
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Sets the current position to the first non-white character after the
|
|
next '\n' (or terminating '\0').
|
|
|
|
--*/
|
|
{
|
|
RestoreToken();
|
|
RestoreLine();
|
|
|
|
m_pszPos = AuxSkipTo( '\n' );
|
|
|
|
if ( *m_pszPos )
|
|
m_pszPos++;
|
|
|
|
return EatWhite();
|
|
}
|
|
|
|
CHAR *
|
|
INET_PARSER::NextToken(
|
|
VOID
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Sets the current position to the next non-white character after the
|
|
current token
|
|
|
|
--*/
|
|
{
|
|
//
|
|
// Make sure the line is terminated so a '\0' will be returned after
|
|
// the last token is found on this line
|
|
//
|
|
|
|
RestoreToken();
|
|
|
|
if ( !m_pszLineTerm )
|
|
TerminateLine();
|
|
|
|
//
|
|
// Skip the current token
|
|
//
|
|
|
|
EatNonWhite();
|
|
|
|
EatWhite();
|
|
|
|
TerminateToken();
|
|
|
|
return m_pszPos;
|
|
}
|
|
|
|
|
|
CHAR *
|
|
INET_PARSER::NextToken(
|
|
CHAR ch
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Advances the position to the next token after ch (stopping
|
|
at the end of the line)
|
|
|
|
--*/
|
|
{
|
|
//
|
|
// Make sure the line is terminated so a '\0' will be returned after
|
|
// the last token is found on this line
|
|
//
|
|
|
|
RestoreToken();
|
|
|
|
if ( !m_pszLineTerm )
|
|
TerminateLine();
|
|
|
|
//
|
|
// Look for the specified character (generally ',' or ';')
|
|
//
|
|
|
|
SkipTo( ch );
|
|
|
|
if ( *m_pszPos )
|
|
m_pszPos++;
|
|
|
|
EatWhite();
|
|
|
|
TerminateToken( ch );
|
|
|
|
return m_pszPos;
|
|
}
|
|
|
|
|
|
CHAR *
|
|
INET_PARSER::SkipTo(
|
|
CHAR ch
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Skips to the specified character or returns a null terminated string
|
|
if the end of the line is reached
|
|
|
|
|
|
--*/
|
|
{
|
|
//
|
|
// Make sure the line is terminated so a '\0' will be returned after
|
|
// the last token is found on this line
|
|
//
|
|
|
|
RestoreToken();
|
|
|
|
if ( !m_pszLineTerm )
|
|
TerminateLine();
|
|
|
|
m_pszPos = AuxSkipTo( ch );
|
|
|
|
return m_pszPos;
|
|
}
|
|
|
|
|
|
VOID
|
|
INET_PARSER::SetListMode(
|
|
BOOL fListMode
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Resets the parser mode to list mode or non-list mode
|
|
|
|
Arguments:
|
|
|
|
--*/
|
|
{
|
|
RestoreToken();
|
|
|
|
if ( !m_pszLineTerm )
|
|
TerminateLine();
|
|
|
|
m_fListMode = fListMode;
|
|
}
|
|
|
|
VOID
|
|
INET_PARSER::TerminateToken(
|
|
CHAR ch
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Zero terminates after the white space of the current token
|
|
|
|
Arguments:
|
|
|
|
--*/
|
|
{
|
|
DBG_ASSERT( !m_pszTokenTerm );
|
|
|
|
m_pszTokenTerm = AuxEatNonWhite( ch );
|
|
|
|
m_chTokenTerm = *m_pszTokenTerm;
|
|
|
|
*m_pszTokenTerm = '\0';
|
|
}
|
|
|
|
VOID
|
|
INET_PARSER::RestoreToken(
|
|
VOID
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Restores the character replaced by the zero terminator
|
|
|
|
Arguments:
|
|
|
|
--*/
|
|
{
|
|
if ( m_pszTokenTerm )
|
|
{
|
|
*m_pszTokenTerm = m_chTokenTerm;
|
|
m_pszTokenTerm = NULL;
|
|
}
|
|
}
|
|
|
|
VOID
|
|
INET_PARSER::TerminateLine(
|
|
VOID
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Zero terminates at the end of this line
|
|
|
|
Arguments:
|
|
|
|
--*/
|
|
{
|
|
DBG_ASSERT( !m_pszLineTerm );
|
|
|
|
m_pszLineTerm = AuxSkipTo( '\n' );
|
|
|
|
//
|
|
// Now trim any trailing white space on the line
|
|
//
|
|
|
|
if ( m_pszLineTerm > m_pszPos )
|
|
{
|
|
m_pszLineTerm--;
|
|
|
|
while ( m_pszLineTerm >= m_pszPos &&
|
|
ISWHITEA( *m_pszLineTerm ))
|
|
{
|
|
m_pszLineTerm--;
|
|
}
|
|
}
|
|
|
|
//
|
|
// Go forward one (trimming found the last non-white
|
|
// character)
|
|
//
|
|
|
|
if ( *m_pszLineTerm &&
|
|
*m_pszLineTerm != '\n' &&
|
|
!ISWHITEA( *m_pszLineTerm ))
|
|
{
|
|
m_pszLineTerm++;
|
|
}
|
|
|
|
m_chLineTerm = *m_pszLineTerm;
|
|
|
|
*m_pszLineTerm = '\0';
|
|
}
|
|
|
|
VOID
|
|
INET_PARSER::RestoreLine(
|
|
VOID
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Restores the character replaced by the zero terminator
|
|
|
|
Arguments:
|
|
|
|
--*/
|
|
{
|
|
if ( m_pszLineTerm )
|
|
{
|
|
*m_pszLineTerm = m_chLineTerm;
|
|
m_pszLineTerm = NULL;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
CHAR *
|
|
INET_PARSER::AuxEatNonWhite(
|
|
CHAR ch
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
In non list mode returns the first white space character after
|
|
the current parse position
|
|
In list mode returns the first delimiter ( "';\n" ) character after
|
|
the current parse position
|
|
|
|
Arguments:
|
|
|
|
ch - Optional character that is considered white space (such as ',' or ';'
|
|
when doing list processing).
|
|
|
|
--*/
|
|
{
|
|
CHAR * psz = m_pszPos;
|
|
|
|
//
|
|
// Note that ISWHITEA includes '\r'. In list mode, comma and semi-colon
|
|
// are considered delimiters
|
|
//
|
|
|
|
if ( !m_fListMode )
|
|
{
|
|
while ( *psz &&
|
|
*psz != '\n' &&
|
|
!ISWHITEA(*psz)&&
|
|
*psz != ch )
|
|
{
|
|
psz++;
|
|
}
|
|
|
|
return psz;
|
|
}
|
|
else
|
|
{
|
|
while ( *psz &&
|
|
*psz != '\n' &&
|
|
#if 0
|
|
// fix #20931
|
|
!ISWHITEA(*psz)&&
|
|
#endif
|
|
*psz != ',' &&
|
|
*psz != ';' &&
|
|
*psz != ch )
|
|
{
|
|
psz++;
|
|
}
|
|
|
|
return psz;
|
|
}
|
|
}
|
|
|
|
|
|
CHAR *
|
|
INET_PARSER::AuxEatWhite(
|
|
VOID
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Returns the first non-white space character after the current parse
|
|
position
|
|
|
|
Arguments:
|
|
|
|
--*/
|
|
{
|
|
CHAR * psz = m_pszPos;
|
|
|
|
//
|
|
// Note that ISWHITEA includes '\r'
|
|
//
|
|
|
|
while ( *psz &&
|
|
*psz != '\n' &&
|
|
ISWHITEA(*psz))
|
|
{
|
|
psz++;
|
|
}
|
|
|
|
return psz;
|
|
}
|
|
|
|
|
|
CHAR *
|
|
INET_PARSER::AuxSkipTo(
|
|
CHAR ch
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Skips to the specified character or returns a null terminated string
|
|
if the end of the line is reached
|
|
|
|
|
|
--*/
|
|
{
|
|
CHAR * psz = m_pszPos;
|
|
|
|
while ( *psz &&
|
|
*psz != '\n' &&
|
|
*psz != ch )
|
|
{
|
|
psz++;
|
|
}
|
|
|
|
return psz;
|
|
}
|