windows-nt/Source/XPSP1/NT/base/win32/fusion/xmlparser/bufferedstream.hxx

168 lines
5.9 KiB
C++
Raw Normal View History

2020-09-26 03:20:57 -05:00
/////////////////////////////////////////////////////////////////////////////////
//
// fusion\xmlparser\BufferedStream.hxx
//
/////////////////////////////////////////////////////////////////////////////////
#ifndef _FUSION_XMLPARSER__BUFFEREDSTREAM_H_INCLUDE_
#define _FUSION_XMLPARSER__BUFFEREDSTREAM_H_INCLUDE_
#pragma once
#include "encodingstream.hxx"
// Returned from nextChar when a new buffer is read. This gives the
// caller some idea of download progress without having to count
// characters. Just call nextChar again to continue on as normal.
#define E_DATA_AVAILABLE 0xC00CE600L
#define E_DATA_REALLOCATE 0xC00CE601L
//------------------------------------------------------------------------
// This class adds buffering and auto-growing semantics to an IStream
// so that a variable length chunk of an IStream can be collected
// in memory for processing using Mark() and getToken() methods.
// It also supports collapsing of newlines into 0x20 if you use
// nextChar2 instead of nextChar.
// It also guarentees a line buffer so that a pointer to the
// beginning of the line can be returned in error conditions.
// (for the degenerate case where there are no new lines, it
// returns the last 100 characters).
//
// Alternatively, buffers can be appended instead of
// using an IStream. In this case the BufferedStream returns
// E_PENDING until the last buffer is appended. Use AppendData instead
// of Load(IStream.
class XMLStream;
class BufferedStream
{
public:
BufferedStream(XMLStream *pXMLStream);
~BufferedStream();
// Method 1: pass in an IStream. The IStream must return unicode
// characters.
HRESULT Load(
/* [unique][in] */ EncodingStream *pStm);
// Method 2: append raw buffers, set lastBuffer to TRUE you are ready to
// return E_ENDOFINPUT. Length is number of chars in buffer. To do unicode
// you must provide a byte order mark (0xFFFE or OxFEFF depending
// on whether it is bigendian or little endian).
HRESULT AppendData(const BYTE* buffer, ULONG length, BOOL lastBuffer);
HRESULT Reset();
// Get next char from buffer , if EOF, set fEOF to be true
HRESULT nextChar(
/* [out] */ WCHAR* ch,
/* [out] */ bool* fEOF);
// Marks the last character read as the start of a buffer
// that grows until Mark is called again. You can mark backwards
// from last character read anywhere up to last marked position
// by passing a non-zero delta. For example, to mark the
// position at the 3rd last character read, call Mark(3);
// xiaoyu : _lCurrent always points to the char to read next
inline void Mark(long back = 0)
{
_lMark = (_lCurrent > back) ? (_lCurrent - back - 1) : 0;
if (_lLinepos != _lCurrent)
{
// only move the marked line position forward, if we haven't
// marked the actual new line characters. This ensures we
// return useful information from getLineBuf.
_lMarkedline = _lLine;
_lMarkedlinepos = _lLinepos;
}
}
// Returns a pointer to a contiguous block of text accumulated
// from the last time Mark() was called up to but not including
// the last character read. (This allows a parser to have a
// lookahead character that is not included in the token).
HRESULT getToken(const WCHAR**p, long* len);
HRESULT switchEncoding(const WCHAR * charset, ULONG len);
// Returns Marked position.
long getLine();
long getLinePos();
WCHAR* getLineBuf(ULONG* len, ULONG* startpos);
long getInputPos(); // absolute position.
long getTokenLength() // convenience function.
{
return (_lCurrent - 1 - _lMark);
}
inline bool isWhiteSpace(WCHAR ch) // no matter what value of "ch"
{
UNUSED(ch);
return (_lLastWhiteSpace == _lCurrent);
}
inline void setWhiteSpace(bool flag = true)
{
_lLastWhiteSpace = flag ? _lCurrent : _lCurrent-1;
}
void init();
// Lock/UnLock is another level on top of Mark/Reset that
// works as follows. If you Lock(), then the buffer keeps everything
// until you UnLock at which time it resets the "Marked" position to
// the Locked() position. This is so that you can scan through
// a series of tokens, but then return all of them in one chunk.
void Lock();
void UnLock();
// Freezing the buffer makes the buffer always grow WITHOUT shifting
// data around in the buffer. This makes it valid to hold on to pointers
// in the buffer until the buffer is unfrozen.
HRESULT Freeze();
HRESULT UnFreeze();
#ifdef FUSION_USE_OLD_XML_PARSER_SOURCE
WCHAR* getEncoding();
#endif
// Special XML optimization.
HRESULT scanPCData(
/* [out] */ WCHAR* ch,
/* [out] */ bool* fWhitespace);
private:
WCHAR nextChar();
HRESULT fillBuffer();
HRESULT prepareForInput();
HRESULT doSwitchEncoding();
long getNewStart();
REncodingStream _pStmInput; // input stream
WCHAR* _pchBuffer; // buffer containing chars from input stream.
long _lCurrent; // current read position in buffer
long _lCurrent2; // used when collapsing white space.
long _lSize; // total size of buffer.
long _lMark; // start of current token.
long _lUsed; // amount of buffer currently used.
WCHAR _chLast; // last character returned.
long _lLine; // current line number
long _lLinepos; // position of start of last line.
long _lMarkedline; // current line number of marked position.
long _lMarkedlinepos;
long _lStartAt; // The number of unicode characters before the current buffer
bool _fEof;
bool _fNotified;
bool _fFrozen;
long _lLockCount;
long _lLockedPos;
long _lLockedLine;
long _lLockedLinePos;
long _lLastWhiteSpace;
long _lMidPoint;
Encoding* _pPendingEncoding;
XMLStream *_pXMLStream; // regular pointer pointing back to the XMLStream object
};
#endif // _BUFFEREDSTREAM_HXX