windows-nt/Source/XPSP1/NT/base/win32/fusion/xmlparser/xmlstream.hxx

303 lines
9.7 KiB
C++
Raw Permalink Normal View History

2020-09-26 03:20:57 -05:00
/*
*
* Copyright (c) 1998,1999 Microsoft Corporation. All rights reserved.
* EXEMPT: copyright change only, no build required
*
*/
#ifndef _XMLSTREAM_HXX
#define _XMLSTREAM_HXX
#pragma once
#include "bufferedstream.hxx"
#include "encodingstream.hxx"
#include "_rawstack.hxx"
class XMLParser;
// the XMLStream class uses the error code and token types defined in the xmlparser
#include <ole2.h>
#include <xmlparser.h>
//==============================================================================
// This enum and StateEntry struct are used in table driven parsing for DTD
// stuff - so that the parser isn't bloated by this stuff. This is about 15%
// slower than a hand written parser.
typedef enum {
OP_OWS, // optional whitespace
OP_WS, // required whitespace
OP_CHAR, // char comparison, _pch[0] is char, _sArg1 is else goto state or error code
OP_CHAR2, // same os OP_CHAR - except it doesn't do _pInput->Mark.
OP_PEEK, // same as OP_CHAR - except it doesn't advance.
OP_NAME, // scan name
OP_TOKEN, // return token, _sArg1 = token
OP_STRING, // scan a string
OP_EXTID, // scan an external id.
OP_STRCMP, // string comparison.
OP_POP, // pop state
OP_NWS, // not whitespace conditional
OP_SUBSET, // skip an internal subset
OP_PUBIDOPTION, // conditional for _fShortPubIdOption
OP_NMTOKEN,
OP_TABLE, // push a new table. (pointer in _pch field).
OP_STABLE, // switch to new table. (pointer in _pch field).
OP_COMMENT,
OP_CONDSECT,
OP_SNCHAR, // conditional 'is start name char'
OP_EQUALS, // scan ' = '
OP_ENCODING, // switch encoding.
OP_CHARWS, // match char or must be white space.
OP_ATTRVAL, //parse attribute values.(_sArg1 = return PCDATA token or not)
OP_PETEST,
OP_ATTEXPAND,
OP_NMSTRING, // unqualified name within quote
OP_FAKESYSTEM,
} OPCODE;
typedef struct {
OPCODE _sOp;
const WCHAR* _pch;
DWORD _sGoto;
DWORD _sArg1;
long _lDelta; // for when we do a Mark(), or Token if OP_CHAR
} StateEntry;
//================================================================================
class XMLStream
{
public:
XMLStream(XMLParser * pXMLParser);
~XMLStream();
//------------------------------------------------------------------------
// These are some more tokens that the XMLStream returns.
// xiaoyu : only few are used in fusion manifest
typedef enum
{
// ADDITIONAL TOKENS THAT THE PARSER PULLS UP
XML_PENDING = 0, // still parsing.
XML_NUMENTITYREF = XML_LASTSUBNODETYPE, // &23;
XML_HEXENTITYREF, // &x0cf7;
XML_BUILTINENTITYREF, //&gt;
XML_TAGEND, // >
XML_EMPTYTAGEND, // /> (text = tag name)
XML_ENDTAG, // </ (text = tag name)
XML_ENDPI, // text = pi body minus '?>'
XML_ENDXMLDECL, // end of xml declaration
XML_ENDDECL, // '>'
XML_CLOSEPAREN,
XML_ENDCONDSECT, // ']]>'
XML_STARTDTDSUBSET,
XML_ENDPROLOG,
XML_DATAAVAILABLE,
XML_DATAREALLOCATE,
} XMLToken;
HRESULT PushStream(
/* [in] */ EncodingStream *pStm,
/* [in] */ bool fExternalPE);
HRESULT AppendData(
/* [in] */ const BYTE *buffer,
/* [in] */ long length,
/* [in] */ BOOL lastBuffer);
HRESULT Reset( void);
HRESULT GetNextToken(
/* [out] */ DWORD *token,
/* [out] */ const WCHAR **text,
/* [out] */ long *length,
/* [out] */ long *nslen);
ULONG GetLine();
ULONG GetLinePosition();
ULONG GetInputPosition();
HRESULT GetLineBuffer(
/* [out] */ const WCHAR * *buf,
/* [out] */ ULONG* len,
/* [out] */ ULONG* startpos);
void SetFlags(
/* [in] */ unsigned short usFlags);
unsigned short GetFlags();
HRESULT ErrorCallback(HRESULT hr);
WCHAR getAttrValueQuoteChar() { return _chTerminator; }
private:
HRESULT init();
void _init();
HRESULT firstAdvance();
HRESULT parseContent();
HRESULT parseElement();
HRESULT parseEndTag();
HRESULT parsePI();
HRESULT parseComment();
HRESULT parseName();
HRESULT parseAttributes();
HRESULT parseAttrValue();
HRESULT parsePCData();
HRESULT parseEntityRef();
HRESULT parseCondSect();
HRESULT parseCData();
HRESULT parseTable();
HRESULT parseEquals();
HRESULT skipWhiteSpace();
inline void mark(long back = 0) { _pInput->Mark(back); }
typedef HRESULT (XMLStream::* StateFunc)();
// The state machine consists of functions where each
// function can determine for itself its own substates
// so that when it is reactivated by a pop() it can pick
// up where it left off. The current substate is set
// to zero on a push() and at pop() time it is restored
// to whatever it was told to be in the push().
HRESULT push(StateFunc f, short substate = 0);
HRESULT pushTable(short substate = 0, const StateEntry* table = NULL, DWORD le = 0);
HRESULT pop(bool boundary = true);
HRESULT switchTo(StateFunc f); // pop & push
// Advance and jump to state
HRESULT AdvanceTo(short substate);
HRESULT PopStream();
HRESULT ScanHexDigits();
HRESULT ScanDecimalDigits();
bool PreEntityText();
// Always use this function instead of calling _pInput->getToken
inline void getToken(const WCHAR** ppText, long* pLen) { _pInput->getToken(ppText,pLen); }
BufferedStream* getCurrentStream();
StateFunc _fnState; // current function.
short _sSubState; // current substate.
short _sSavedState;
struct StateInfo
{
StateFunc _fnState;
short _sSubState;
const StateEntry* _pTable;
//DWORD _lEOFError;
int _cStreamDepth;
};
_rawstack<StateInfo> _pStack;
struct InputInfo
{
BufferedStream* _pInput;
WCHAR _chLookahead;
//bool _fPE;
//bool _fExternalPE;
//bool _fInternalSubset; // remember that we were in internal subset.
StateFunc _fnState; // remember the state function when pushstream
// it is used to check parameter entity replacement text
// is properly nested with markup declarations.
};
_rawstack<InputInfo> _pStreams;
// Cache the current value of _pStreams.used() which is used to making sure
// a parameter entity doesn't pop out of the scope in which it was entered.
int _cStreamDepth;
BufferedStream* _pInput; // current input stream.
WCHAR _chNextLookahead;
bool _fWasUsingBuffer;
long _lParseStringLevel;
DWORD _nPreToken;
DWORD _nToken;
long _lLengthDelta; // amount to adjust token length by
long _lMarkDelta; // amount to adjust mark position by
bool _fDelayMark;
bool _fFoundFirstElement; // special trick for EndProlog.
WCHAR _chLookahead;
bool _fWhitespace; // found whitespace while parsing PCDATA
WCHAR _chTerminator;
WCHAR _chEndChar; // for parseAttributes.
bool _fEOF; // reached end of file.
long _lNslen; // namespace length
long _lNssep; // namespace separator length ':' or '::'.
long _lEntityPos; // for parsing entity references.
bool _fPCDataPending; // whether pcdata is pending during parseEntityRef.
const WCHAR* _pchCDataState;
int _cAttrCount;
int _nEntityNSLen; // saved namespace info for entity references.
// Switches.
unsigned short _usFlags;
// bool _fFloatingAmp; // used in ParseEntityRef()
bool _fShortEndTags; // used in ParserEndTag()
bool _fCaseInsensitive;
bool _fNoNamespaces; // used in parseName()
//bool _fNoWhitespaceNodes; // used in DTD data
//bool _fIE4Quirks; // xiaoyu : what it means?
bool _fNoDTDNodes; // only used in GetDTDNextToken(). may be deleted later
//bool _fHandlePE; // This flag is used to turn on and off parameter entity handling in DTD
// xiaoyu: used in ParsePI(), ParseDTD(), parseComment(),
// parsePEDecl(), parseIgnoSet()
// for table driven parsing.
const StateEntry* _pTable;
//DWORD _lEOFError; // used in parsePEDecl(), pushTable(), parseTable(),
// buffer used during whitespace normalization
WCHAR* _pchBuffer;
long _lBufLen;
long _lBufSize;
bool _fFoundWhitespace;
bool _fUsingBuffer;
bool _fFoundNonWhitespace;
bool _fCheckAttribute; // need to check the attribute name
// xiaoyu : used for complicate attribute type, such as "xml:lang", "xmlns"
bool _fDTD; // xiaoyu whether xml contains DTD
//bool _fInternalSubset; // xiaoyu used in ParseDTD
//int _cConditionalSection;
//bool _fFoundPEREf;
//bool _fWasDTD;
// bool _fParsingNames;
bool _fParsingAttDef; // used in ParseAttrValue()
//int _cIgnoreSectLevel;
//bool _fResolved; // used in ParseEntity();
bool _fReturnAttributeValue;
//int _cStreams; // used to identify if PushStream was called.
// used in parseEntity();
WCHAR _wcEntityValue; // used in parseEntityRef()
XMLParser * _pXMLParser; // regular pointer pointing back to the parser
inline HRESULT PushChar(WCHAR ch)
{
if (_lBufLen < _lBufSize)
{
_pchBuffer[_lBufLen++] = ch; return S_OK;
}
else return _PushChar(ch);
}
HRESULT _PushChar(WCHAR ch); // grow the buffer.
};
#endif // _XML_STREAM_HXX