///////////////////////////////////////////////////////////////////////////////// // // fusion\xmlparser\BufferedStream.hxx // ///////////////////////////////////////////////////////////////////////////////// #ifndef _FUSION_XMLPARSER__BUFFEREDSTREAM_H_INCLUDE_ #define _FUSION_XMLPARSER__BUFFEREDSTREAM_H_INCLUDE_ #pragma once #include "encodingstream.hxx" // Returned from nextChar when a new buffer is read. This gives the // caller some idea of download progress without having to count // characters. Just call nextChar again to continue on as normal. #define E_DATA_AVAILABLE 0xC00CE600L #define E_DATA_REALLOCATE 0xC00CE601L //------------------------------------------------------------------------ // This class adds buffering and auto-growing semantics to an IStream // so that a variable length chunk of an IStream can be collected // in memory for processing using Mark() and getToken() methods. // It also supports collapsing of newlines into 0x20 if you use // nextChar2 instead of nextChar. // It also guarentees a line buffer so that a pointer to the // beginning of the line can be returned in error conditions. // (for the degenerate case where there are no new lines, it // returns the last 100 characters). // // Alternatively, buffers can be appended instead of // using an IStream. In this case the BufferedStream returns // E_PENDING until the last buffer is appended. Use AppendData instead // of Load(IStream. class XMLStream; class BufferedStream { public: BufferedStream(XMLStream *pXMLStream); ~BufferedStream(); // Method 1: pass in an IStream. The IStream must return unicode // characters. HRESULT Load( /* [unique][in] */ EncodingStream *pStm); // Method 2: append raw buffers, set lastBuffer to TRUE you are ready to // return E_ENDOFINPUT. Length is number of chars in buffer. To do unicode // you must provide a byte order mark (0xFFFE or OxFEFF depending // on whether it is bigendian or little endian). HRESULT AppendData(const BYTE* buffer, ULONG length, BOOL lastBuffer); HRESULT Reset(); // Get next char from buffer , if EOF, set fEOF to be true HRESULT nextChar( /* [out] */ WCHAR* ch, /* [out] */ bool* fEOF); // Marks the last character read as the start of a buffer // that grows until Mark is called again. You can mark backwards // from last character read anywhere up to last marked position // by passing a non-zero delta. For example, to mark the // position at the 3rd last character read, call Mark(3); // xiaoyu : _lCurrent always points to the char to read next inline void Mark(long back = 0) { _lMark = (_lCurrent > back) ? (_lCurrent - back - 1) : 0; if (_lLinepos != _lCurrent) { // only move the marked line position forward, if we haven't // marked the actual new line characters. This ensures we // return useful information from getLineBuf. _lMarkedline = _lLine; _lMarkedlinepos = _lLinepos; } } // Returns a pointer to a contiguous block of text accumulated // from the last time Mark() was called up to but not including // the last character read. (This allows a parser to have a // lookahead character that is not included in the token). HRESULT getToken(const WCHAR**p, long* len); HRESULT switchEncoding(const WCHAR * charset, ULONG len); // Returns Marked position. long getLine(); long getLinePos(); WCHAR* getLineBuf(ULONG* len, ULONG* startpos); long getInputPos(); // absolute position. long getTokenLength() // convenience function. { return (_lCurrent - 1 - _lMark); } inline bool isWhiteSpace(WCHAR ch) // no matter what value of "ch" { UNUSED(ch); return (_lLastWhiteSpace == _lCurrent); } inline void setWhiteSpace(bool flag = true) { _lLastWhiteSpace = flag ? _lCurrent : _lCurrent-1; } void init(); // Lock/UnLock is another level on top of Mark/Reset that // works as follows. If you Lock(), then the buffer keeps everything // until you UnLock at which time it resets the "Marked" position to // the Locked() position. This is so that you can scan through // a series of tokens, but then return all of them in one chunk. void Lock(); void UnLock(); // Freezing the buffer makes the buffer always grow WITHOUT shifting // data around in the buffer. This makes it valid to hold on to pointers // in the buffer until the buffer is unfrozen. HRESULT Freeze(); HRESULT UnFreeze(); #ifdef FUSION_USE_OLD_XML_PARSER_SOURCE WCHAR* getEncoding(); #endif // Special XML optimization. HRESULT scanPCData( /* [out] */ WCHAR* ch, /* [out] */ bool* fWhitespace); private: WCHAR nextChar(); HRESULT fillBuffer(); HRESULT prepareForInput(); HRESULT doSwitchEncoding(); long getNewStart(); REncodingStream _pStmInput; // input stream WCHAR* _pchBuffer; // buffer containing chars from input stream. long _lCurrent; // current read position in buffer long _lCurrent2; // used when collapsing white space. long _lSize; // total size of buffer. long _lMark; // start of current token. long _lUsed; // amount of buffer currently used. WCHAR _chLast; // last character returned. long _lLine; // current line number long _lLinepos; // position of start of last line. long _lMarkedline; // current line number of marked position. long _lMarkedlinepos; long _lStartAt; // The number of unicode characters before the current buffer bool _fEof; bool _fNotified; bool _fFrozen; long _lLockCount; long _lLockedPos; long _lLockedLine; long _lLockedLinePos; long _lLastWhiteSpace; long _lMidPoint; Encoding* _pPendingEncoding; XMLStream *_pXMLStream; // regular pointer pointing back to the XMLStream object }; #endif // _BUFFEREDSTREAM_HXX