303 lines
9.7 KiB
C++
303 lines
9.7 KiB
C++
/*
|
|
*
|
|
* Copyright (c) 1998,1999 Microsoft Corporation. All rights reserved.
|
|
* EXEMPT: copyright change only, no build required
|
|
*
|
|
*/
|
|
#ifndef _XMLSTREAM_HXX
|
|
#define _XMLSTREAM_HXX
|
|
#pragma once
|
|
|
|
#include "bufferedstream.hxx"
|
|
#include "encodingstream.hxx"
|
|
#include "_rawstack.hxx"
|
|
|
|
class XMLParser;
|
|
|
|
// the XMLStream class uses the error code and token types defined in the xmlparser
|
|
#include <ole2.h>
|
|
#include <xmlparser.h>
|
|
|
|
//==============================================================================
|
|
// This enum and StateEntry struct are used in table driven parsing for DTD
|
|
// stuff - so that the parser isn't bloated by this stuff. This is about 15%
|
|
// slower than a hand written parser.
|
|
|
|
typedef enum {
|
|
OP_OWS, // optional whitespace
|
|
OP_WS, // required whitespace
|
|
OP_CHAR, // char comparison, _pch[0] is char, _sArg1 is else goto state or error code
|
|
OP_CHAR2, // same os OP_CHAR - except it doesn't do _pInput->Mark.
|
|
OP_PEEK, // same as OP_CHAR - except it doesn't advance.
|
|
OP_NAME, // scan name
|
|
OP_TOKEN, // return token, _sArg1 = token
|
|
OP_STRING, // scan a string
|
|
OP_EXTID, // scan an external id.
|
|
OP_STRCMP, // string comparison.
|
|
OP_POP, // pop state
|
|
OP_NWS, // not whitespace conditional
|
|
OP_SUBSET, // skip an internal subset
|
|
OP_PUBIDOPTION, // conditional for _fShortPubIdOption
|
|
OP_NMTOKEN,
|
|
OP_TABLE, // push a new table. (pointer in _pch field).
|
|
OP_STABLE, // switch to new table. (pointer in _pch field).
|
|
OP_COMMENT,
|
|
OP_CONDSECT,
|
|
OP_SNCHAR, // conditional 'is start name char'
|
|
OP_EQUALS, // scan ' = '
|
|
OP_ENCODING, // switch encoding.
|
|
OP_CHARWS, // match char or must be white space.
|
|
OP_ATTRVAL, //parse attribute values.(_sArg1 = return PCDATA token or not)
|
|
OP_PETEST,
|
|
OP_ATTEXPAND,
|
|
OP_NMSTRING, // unqualified name within quote
|
|
OP_FAKESYSTEM,
|
|
} OPCODE;
|
|
|
|
typedef struct {
|
|
OPCODE _sOp;
|
|
const WCHAR* _pch;
|
|
DWORD _sGoto;
|
|
DWORD _sArg1;
|
|
long _lDelta; // for when we do a Mark(), or Token if OP_CHAR
|
|
} StateEntry;
|
|
|
|
//================================================================================
|
|
class XMLStream
|
|
{
|
|
public:
|
|
XMLStream(XMLParser * pXMLParser);
|
|
~XMLStream();
|
|
|
|
//------------------------------------------------------------------------
|
|
// These are some more tokens that the XMLStream returns.
|
|
// xiaoyu : only few are used in fusion manifest
|
|
typedef enum
|
|
{
|
|
// ADDITIONAL TOKENS THAT THE PARSER PULLS UP
|
|
XML_PENDING = 0, // still parsing.
|
|
XML_NUMENTITYREF = XML_LASTSUBNODETYPE, // &23;
|
|
XML_HEXENTITYREF, // &x0cf7;
|
|
XML_BUILTINENTITYREF, //>
|
|
XML_TAGEND, // >
|
|
XML_EMPTYTAGEND, // /> (text = tag name)
|
|
XML_ENDTAG, // </ (text = tag name)
|
|
XML_ENDPI, // text = pi body minus '?>'
|
|
XML_ENDXMLDECL, // end of xml declaration
|
|
XML_ENDDECL, // '>'
|
|
XML_CLOSEPAREN,
|
|
XML_ENDCONDSECT, // ']]>'
|
|
XML_STARTDTDSUBSET,
|
|
XML_ENDPROLOG,
|
|
XML_DATAAVAILABLE,
|
|
XML_DATAREALLOCATE,
|
|
} XMLToken;
|
|
|
|
HRESULT PushStream(
|
|
/* [in] */ EncodingStream *pStm,
|
|
/* [in] */ bool fExternalPE);
|
|
|
|
HRESULT AppendData(
|
|
/* [in] */ const BYTE *buffer,
|
|
/* [in] */ long length,
|
|
/* [in] */ BOOL lastBuffer);
|
|
|
|
HRESULT Reset( void);
|
|
|
|
HRESULT GetNextToken(
|
|
/* [out] */ DWORD *token,
|
|
/* [out] */ const WCHAR **text,
|
|
/* [out] */ long *length,
|
|
/* [out] */ long *nslen);
|
|
|
|
ULONG GetLine();
|
|
|
|
ULONG GetLinePosition();
|
|
|
|
ULONG GetInputPosition();
|
|
|
|
HRESULT GetLineBuffer(
|
|
/* [out] */ const WCHAR * *buf,
|
|
/* [out] */ ULONG* len,
|
|
/* [out] */ ULONG* startpos);
|
|
|
|
void SetFlags(
|
|
/* [in] */ unsigned short usFlags);
|
|
|
|
unsigned short GetFlags();
|
|
|
|
HRESULT ErrorCallback(HRESULT hr);
|
|
|
|
WCHAR getAttrValueQuoteChar() { return _chTerminator; }
|
|
|
|
private:
|
|
HRESULT init();
|
|
void _init();
|
|
|
|
HRESULT firstAdvance();
|
|
HRESULT parseContent();
|
|
HRESULT parseElement();
|
|
HRESULT parseEndTag();
|
|
HRESULT parsePI();
|
|
HRESULT parseComment();
|
|
HRESULT parseName();
|
|
HRESULT parseAttributes();
|
|
HRESULT parseAttrValue();
|
|
|
|
HRESULT parsePCData();
|
|
HRESULT parseEntityRef();
|
|
|
|
HRESULT parseCondSect();
|
|
HRESULT parseCData();
|
|
|
|
HRESULT parseTable();
|
|
HRESULT parseEquals();
|
|
|
|
HRESULT skipWhiteSpace();
|
|
|
|
inline void mark(long back = 0) { _pInput->Mark(back); }
|
|
|
|
typedef HRESULT (XMLStream::* StateFunc)();
|
|
|
|
// The state machine consists of functions where each
|
|
// function can determine for itself its own substates
|
|
// so that when it is reactivated by a pop() it can pick
|
|
// up where it left off. The current substate is set
|
|
// to zero on a push() and at pop() time it is restored
|
|
// to whatever it was told to be in the push().
|
|
HRESULT push(StateFunc f, short substate = 0);
|
|
HRESULT pushTable(short substate = 0, const StateEntry* table = NULL, DWORD le = 0);
|
|
HRESULT pop(bool boundary = true);
|
|
HRESULT switchTo(StateFunc f); // pop & push
|
|
|
|
// Advance and jump to state
|
|
HRESULT AdvanceTo(short substate);
|
|
|
|
HRESULT PopStream();
|
|
|
|
HRESULT ScanHexDigits();
|
|
HRESULT ScanDecimalDigits();
|
|
|
|
bool PreEntityText();
|
|
|
|
// Always use this function instead of calling _pInput->getToken
|
|
inline void getToken(const WCHAR** ppText, long* pLen) { _pInput->getToken(ppText,pLen); }
|
|
|
|
BufferedStream* getCurrentStream();
|
|
|
|
StateFunc _fnState; // current function.
|
|
short _sSubState; // current substate.
|
|
short _sSavedState;
|
|
|
|
struct StateInfo
|
|
{
|
|
StateFunc _fnState;
|
|
short _sSubState;
|
|
const StateEntry* _pTable;
|
|
//DWORD _lEOFError;
|
|
int _cStreamDepth;
|
|
};
|
|
_rawstack<StateInfo> _pStack;
|
|
|
|
struct InputInfo
|
|
{
|
|
BufferedStream* _pInput;
|
|
WCHAR _chLookahead;
|
|
//bool _fPE;
|
|
//bool _fExternalPE;
|
|
//bool _fInternalSubset; // remember that we were in internal subset.
|
|
StateFunc _fnState; // remember the state function when pushstream
|
|
// it is used to check parameter entity replacement text
|
|
// is properly nested with markup declarations.
|
|
};
|
|
_rawstack<InputInfo> _pStreams;
|
|
|
|
// Cache the current value of _pStreams.used() which is used to making sure
|
|
// a parameter entity doesn't pop out of the scope in which it was entered.
|
|
int _cStreamDepth;
|
|
|
|
BufferedStream* _pInput; // current input stream.
|
|
|
|
WCHAR _chNextLookahead;
|
|
bool _fWasUsingBuffer;
|
|
long _lParseStringLevel;
|
|
|
|
DWORD _nPreToken;
|
|
DWORD _nToken;
|
|
long _lLengthDelta; // amount to adjust token length by
|
|
long _lMarkDelta; // amount to adjust mark position by
|
|
bool _fDelayMark;
|
|
bool _fFoundFirstElement; // special trick for EndProlog.
|
|
|
|
WCHAR _chLookahead;
|
|
bool _fWhitespace; // found whitespace while parsing PCDATA
|
|
WCHAR _chTerminator;
|
|
WCHAR _chEndChar; // for parseAttributes.
|
|
bool _fEOF; // reached end of file.
|
|
|
|
long _lNslen; // namespace length
|
|
long _lNssep; // namespace separator length ':' or '::'.
|
|
|
|
long _lEntityPos; // for parsing entity references.
|
|
bool _fPCDataPending; // whether pcdata is pending during parseEntityRef.
|
|
const WCHAR* _pchCDataState;
|
|
int _cAttrCount;
|
|
int _nEntityNSLen; // saved namespace info for entity references.
|
|
|
|
// Switches.
|
|
unsigned short _usFlags;
|
|
// bool _fFloatingAmp; // used in ParseEntityRef()
|
|
bool _fShortEndTags; // used in ParserEndTag()
|
|
bool _fCaseInsensitive;
|
|
bool _fNoNamespaces; // used in parseName()
|
|
//bool _fNoWhitespaceNodes; // used in DTD data
|
|
//bool _fIE4Quirks; // xiaoyu : what it means?
|
|
bool _fNoDTDNodes; // only used in GetDTDNextToken(). may be deleted later
|
|
//bool _fHandlePE; // This flag is used to turn on and off parameter entity handling in DTD
|
|
// xiaoyu: used in ParsePI(), ParseDTD(), parseComment(),
|
|
// parsePEDecl(), parseIgnoSet()
|
|
|
|
|
|
// for table driven parsing.
|
|
const StateEntry* _pTable;
|
|
//DWORD _lEOFError; // used in parsePEDecl(), pushTable(), parseTable(),
|
|
|
|
// buffer used during whitespace normalization
|
|
WCHAR* _pchBuffer;
|
|
long _lBufLen;
|
|
long _lBufSize;
|
|
bool _fFoundWhitespace;
|
|
bool _fUsingBuffer;
|
|
bool _fFoundNonWhitespace;
|
|
bool _fCheckAttribute; // need to check the attribute name
|
|
// xiaoyu : used for complicate attribute type, such as "xml:lang", "xmlns"
|
|
|
|
bool _fDTD; // xiaoyu whether xml contains DTD
|
|
//bool _fInternalSubset; // xiaoyu used in ParseDTD
|
|
//int _cConditionalSection;
|
|
//bool _fFoundPEREf;
|
|
//bool _fWasDTD;
|
|
// bool _fParsingNames;
|
|
|
|
bool _fParsingAttDef; // used in ParseAttrValue()
|
|
//int _cIgnoreSectLevel;
|
|
//bool _fResolved; // used in ParseEntity();
|
|
bool _fReturnAttributeValue;
|
|
//int _cStreams; // used to identify if PushStream was called.
|
|
// used in parseEntity();
|
|
WCHAR _wcEntityValue; // used in parseEntityRef()
|
|
XMLParser * _pXMLParser; // regular pointer pointing back to the parser
|
|
|
|
inline HRESULT PushChar(WCHAR ch)
|
|
{
|
|
if (_lBufLen < _lBufSize)
|
|
{
|
|
_pchBuffer[_lBufLen++] = ch; return S_OK;
|
|
}
|
|
else return _PushChar(ch);
|
|
}
|
|
HRESULT _PushChar(WCHAR ch); // grow the buffer.
|
|
};
|
|
|
|
#endif // _XML_STREAM_HXX
|