windows-nt/Source/XPSP1/NT/base/win32/fusion/xmlparser/bufferedstream.cxx
2020-09-26 16:20:57 +08:00

566 lines
16 KiB
C++

/////////////////////////////////////////////////////////////////////////////////
//
// fusion\xmlparser\BufferedStream.cxx
//
/////////////////////////////////////////////////////////////////////////////////
#include "stdinc.h"
#include "core.hxx"
#pragma hdrstop
#include <memory.h>
//#include <shlwapip.h>
#include <ole2.h>
#include <xmlparser.h>
#include "bufferedstream.hxx"
#include "xmlstream.hxx"
#include "encodingstream.hxx"
#include "xmlhelper.hxx"
const long BLOCK_SIZE = 4096;
// no point remembering a line buffer longer than this because client
// probably can't deal with that anyway.
const long MAX_LINE_BUFFER = 512;
BufferedStream::BufferedStream(XMLStream *pXMLStream)
{
_pchBuffer = NULL;
_lSize = 0;
_pXMLStream = pXMLStream;
init();
}
/////////////////////////////////////////////////////////////////////////////
void BufferedStream::init()
{
_lCurrent = _lUsed = _lMark = 0;
_lLine = 1; // lines start at 1.
_lMarkedline = 1;
_lLinepos = 0;
_lMarkedlinepos = 0;
_chLast = 0;
_lStartAt = 0;
_fEof = false;
_lLockedPos = -1;
_lLastWhiteSpace = -1;
_lLockCount = 0;
_fNotified = false;
_fFrozen = false;
_pPendingEncoding = NULL;
}
/////////////////////////////////////////////////////////////////////////////
BufferedStream::~BufferedStream()
{
delete [] _pchBuffer;
_pStmInput = NULL;
delete _pPendingEncoding;
_pPendingEncoding = NULL;
}
/////////////////////////////////////////////////////////////////////////////
HRESULT BufferedStream::Reset()
{
init();
delete[] _pchBuffer;
_pchBuffer = NULL;
_lSize = 0;
_pStmInput = NULL;
_lLockedPos = -1;
_lLockCount = 0;
_fFrozen = false;
delete _pPendingEncoding;
_pPendingEncoding = NULL;
return S_OK;
}
/////////////////////////////////////////////////////////////////////////////
HRESULT
BufferedStream::Load(
/* [unique][in] */ EncodingStream __RPC_FAR *pStm)
{
if (pStm != NULL)
{
init();
_pStmInput = pStm;
return S_OK;
}
else
{
_pStmInput = NULL;
}
return S_OK;
}
/////////////////////////////////////////////////////////////////////////////
HRESULT
BufferedStream::AppendData( const BYTE* in, ULONG length, BOOL lastBuffer)
{
HRESULT hr;
if (_fEof)
{
init();
}
if (!_pStmInput)
{
EncodingStream* stream = (EncodingStream*)EncodingStream::newEncodingStream(NULL);
if (stream == NULL)
return E_OUTOFMEMORY;
_pStmInput = stream;
stream->Release(); // Smart pointer is holding a ref
}
checkhr2(_pStmInput->AppendData(in, length, lastBuffer));
return S_OK;
}
/////////////////////////////////////////////////////////////////////////////
HRESULT
BufferedStream::nextChar(
/* [out] */ WCHAR* ch,
/* [out] */ bool* fEOF)
{
HRESULT hr;
if (_lCurrent >= _lUsed)
{
if (_fEof)
{
*fEOF = true;
return S_OK;
}
if (! _fNotified && _lUsed > 0)
{
_fNotified = true; // notify data available BEFORE blowing
// NOTE: this code approximates what prepareForInput does
// in order to accurately predict when the buffer is about to
// be re-allocated.
long shift = _fFrozen ? 0 : getNewStart(); // is data about to shift?
long used = _lUsed - shift; // this is how much is really used after shift
if (_lSize - used < BLOCK_SIZE + 1) // +1 for null termination.
{
// we will reallocate !! So return a special
// return code
hr = E_DATA_REALLOCATE;
}
else
hr = E_DATA_AVAILABLE; // away the old data so parser can save it if need be.
checkhr2( _pXMLStream->ErrorCallback(hr) );
}
checkhr2( fillBuffer() );
if (_fEof)
{
*fEOF = true;
return S_OK;
}
_fNotified = false;
}
WCHAR result = _pchBuffer[_lCurrent++];
switch (result)
{
case 0xa:
case 0xd:
if (result == 0xd || _chLast != 0xd)
_lLine++;
_lLinepos = _lCurrent;
_chLast = result;
_lLastWhiteSpace = _lCurrent;
break;
case 0x20:
case 0x9:
_lLastWhiteSpace = _lCurrent;
break;
case 0xfffe:
case 0xffff:
//case 0xfeff:
::FusionpDbgPrintEx(
FUSION_DBG_LEVEL_ERROR,
"SXS.DLL: XML Parser found either 0xfffe or 0xffff\n");
return XML_E_BADCHARDATA;
}
*ch = result;
return S_OK;
}
/////////////////////////////////////////////////////////////////////////////
HRESULT BufferedStream::scanPCData(
/* [out] */ WCHAR* ch,
/* [out] */ bool* fWhitespace)
{
WCHAR result;
bool foundNonWhiteSpace = false;
if (! isWhiteSpace(*ch))
foundNonWhiteSpace = true;
// Then skip the data until we find '<', '>' or '&'
while (_lCurrent < _lUsed)
{
result = _pchBuffer[_lCurrent++];
switch (result)
{
case ']': // xiaoyu : the specified chars can be changed for our own purpose
case '>':
case '<':
case '&':
case '\'': // so this can be used to scan attribute values also.
case '"': // so this can be used to scan attribute values also.
*ch = result;
if (foundNonWhiteSpace)
*fWhitespace = false;
return S_OK;
break;
case 0xa:
case 0xd:
if (result == 0xd || _chLast != 0xd)
_lLine++;
_lLinepos = _lCurrent;
_chLast = result;
_lLastWhiteSpace = _lCurrent;
break;
case 0x20:
case 0x9:
_lLastWhiteSpace = _lCurrent;
break;
case 0xfffe:
case 0xffff:
::FusionpDbgPrintEx(
FUSION_DBG_LEVEL_ERROR,
"SXS.DLL: XML Parser found either 0xfffe or 0xffff\n");
return XML_E_BADCHARDATA;
default:
foundNonWhiteSpace = true;
break;
}
}
// And just return E_PENDING if we run out of buffer.
if (foundNonWhiteSpace)
*fWhitespace = false;
return E_PENDING;
}
/////////////////////////////////////////////////////////////////////////////
long BufferedStream::getLine()
{
return _lMarkedline;
}
/////////////////////////////////////////////////////////////////////////////
long BufferedStream::getLinePos()
{
// _lMarkedlinepos is the position of the beginning of the marked line
// relative to the beginning of the buffer, and _lMark is the
// position of the marked token relative to the beginning of the
// buffer, So the position of the marked token relative to the
// current line is the difference between the two.
// We also return a 1-based position so that the start of the
// line = column 1. This is consistent with the line numbers
// which are also 1-based.
return (_lMarkedlinepos > _lMark+1) ? 0 : _lMark+1-_lMarkedlinepos;
}
/////////////////////////////////////////////////////////////////////////////
long BufferedStream::getInputPos()
{
return _lStartAt+_lMark;
}
/////////////////////////////////////////////////////////////////////////////
WCHAR* BufferedStream::getLineBuf(ULONG* len, ULONG* startpos)
{
*len = 0;
if (_pchBuffer == NULL)
return NULL;
WCHAR* result = &_pchBuffer[_lMarkedlinepos];
ULONG i = 0;
// internal _pchBuffer is guarenteed to be null terminated.
WCHAR ch = result[i];
while (ch != 0 && ch != L'\n' && ch != L'\r')
{
i++;
ch = result[i];
}
*len = i;
// also return the line position relative to start of
// returned buffer.
*startpos = (_lMarkedlinepos > _lMark+1) ? 0 : _lMark+1-_lMarkedlinepos;
return result;
}
/////////////////////////////////////////////////////////////////////////////
HRESULT BufferedStream::switchEncoding(const WCHAR * charset, ULONG len)
{
HRESULT hr = S_OK;
if (!_pStmInput)
{
hr = E_FAIL;
goto CleanUp;
}
else
{
_pPendingEncoding = Encoding::newEncoding(charset, len);
if (_pPendingEncoding == NULL)
{
hr = E_OUTOFMEMORY;
goto CleanUp;
}
if (! _fFrozen)
{
hr = doSwitchEncoding();
}
}
CleanUp:
return hr;
}
/////////////////////////////////////////////////////////////////////////////
HRESULT BufferedStream::doSwitchEncoding()
{
Encoding* encoding = _pPendingEncoding;
_pPendingEncoding = NULL;
HRESULT hr = _pStmInput->switchEncodingAt(encoding, _lStartAt + _lCurrent);
if (hr == S_FALSE)
{
// need to re-read to force re-decode into new encoding.
// In other words we have to forget that we read past this
// position already so that the next call to nextChar
// will call FillBuffer again.
// (+1 so that nextChar works correctly).
_lUsed = _lStartAt + _lCurrent;
hr = S_OK;
}
else if (FAILED(hr))
{
hr = (hr == E_INVALIDARG) ? XML_E_INVALIDENCODING : XML_E_INVALIDSWITCH;
}
return hr;
}
/////////////////////////////////////////////////////////////////////////////
// Returns a pointer to a contiguous block of text accumulated
// from the last time Mark() was called up to but not including
// the last character read. (This allows a parser to have a
// lookahead character that is not included in the token).
HRESULT
BufferedStream::getToken(const WCHAR**p, long* len)
{
if (_pchBuffer == NULL)
return E_FAIL;
if (_lCurrent != _lCurrent2)
{
// need to fix up buffer since it is no
// out of sync since we've been compressing
// whitespace.
}
*p = &_pchBuffer[_lMark];
*len = getTokenLength();
return S_OK;
}
/////////////////////////////////////////////////////////////////////////////
void
BufferedStream::Lock()
{
// We allow nested locking - where the outer lock wins - unlock only
// really unlocks when the outer lock is unlocked.
if (++_lLockCount == 1)
{
_lLockedPos = _lMark;
_lLockedLine = _lMarkedline;
_lLockedLinePos = _lMarkedlinepos;
}
}
/////////////////////////////////////////////////////////////////////////////
void
BufferedStream::UnLock()
{
if (--_lLockCount == 0)
{
_lMark = _lLockedPos;
_lMarkedline = _lLockedLine;
_lMarkedlinepos = _lLockedLinePos;
_lLockedPos = -1;
}
}
/////////////////////////////////////////////////////////////////////////////
HRESULT
BufferedStream::Freeze()
{
HRESULT hr;
if (_lCurrent > _lMidPoint)
{
// Since we freeze the buffer a lot now (any time we're inside
// a tag) we need to shift the bytes down in the buffer more
// frequently in order to guarentee we have space in the buffer
// when we need it. Otherwize the buffer would tend to just
// keep growing and growing. So we shift the buffer when we
// go past the midpoint.
checkhr2( prepareForInput() );
}
_fFrozen = true;
return S_OK;
}
/////////////////////////////////////////////////////////////////////////////
HRESULT
BufferedStream::UnFreeze()
{
_fFrozen = false;
if (_pPendingEncoding)
{
return doSwitchEncoding();
}
return S_OK;
}
/////////////////////////////////////////////////////////////////////////////
HRESULT
BufferedStream::fillBuffer()
{
HRESULT hr;
checkhr2( prepareForInput() );
if (_pStmInput)
{
long space = _lSize - _lUsed - 1; // reserve 1 for NULL termination
// get more bytes.
ULONG read = 0;
HRESULT rc = _pStmInput->Read(&_pchBuffer[_lUsed], space*sizeof(WCHAR), &read);
_lUsed += read/sizeof(WCHAR); // stream must return unicode characters.
_pchBuffer[_lUsed] = 0; // NULL terminate the _pchBuffer.
if (FAILED(rc))
return rc;
if (read == 0)
{
_fEof = true;
// increment _lCurrent, so that getToken returns
// last character in file.
_lCurrent++; _lCurrent2++;
}
}
else
{
// SetInput or AppendData hasn't been called yet.
return E_PENDING;
}
return S_OK;
}
/////////////////////////////////////////////////////////////////////////////
HRESULT
BufferedStream::prepareForInput()
{
// move the currently used section of the _pchBuffer
// (buf[mark] to buf[used]) down to the beginning of
// the _pchBuffer.
long newstart = 0;
// BUGBUG - if this code is changed BufferedStream::nextChar has to
// be updated also so that they stay in sync, otherwise we might
// re-allocated the buffer without generating an E_DATA_REALLOCATE
// notification - which would be very bad (causes GPF's in the parser).
if (! _fFrozen) // can't shift bits if the buffer is frozen.
{
newstart = getNewStart();
if (newstart > 0)
{
WCHAR* src = &_pchBuffer[newstart];
_lUsed -= newstart;
_lStartAt += newstart;
::memmove(_pchBuffer,src,_lUsed*sizeof(WCHAR));
_lCurrent -= newstart;
_lCurrent2 -= newstart;
_lLastWhiteSpace -= newstart;
_lLinepos = (_lLinepos > newstart) ? _lLinepos-newstart : 0;
_lMarkedlinepos = (_lLinepos > newstart) ? _lMarkedlinepos-newstart : 0;
_lMark -= newstart;
_lLockedLinePos = (_lLockedLinePos > newstart) ? _lLockedLinePos-newstart : 0;
_lLockedPos -= newstart;
}
}
// make sure we have a reasonable amount of space
// left in the _pchBuffer.
long space = _lSize - _lUsed;
if (space > 0) space--; // reserve 1 for NULL termination
if (_pchBuffer == NULL || space < BLOCK_SIZE)
{
// double the size of the buffer.
long newsize = (_lSize == 0) ? BLOCK_SIZE : (_lSize*2);
WCHAR* newbuf = NEW (WCHAR[newsize]);
if (newbuf == NULL)
{
// try more conservative allocation.
newsize = _lSize + BLOCK_SIZE;
newbuf = NEW (WCHAR[newsize]);
}
if (newbuf == NULL && space == 0)
return E_OUTOFMEMORY;
if (newbuf != NULL)
{
if (_pchBuffer != NULL)
{
// copy old bytes to new _pchBuffer.
::memcpy(newbuf,_pchBuffer,_lUsed*sizeof(WCHAR));
delete [] _pchBuffer;
}
newbuf[_lUsed] = 0; // make sure it's null terminated.
_pchBuffer = newbuf;
_lSize = newsize;
_lMidPoint = newsize / 2;
}
}
return S_OK;
}
/////////////////////////////////////////////////////////////////////////////
long
BufferedStream::getNewStart()
{
long newstart = 0;
// Unless the buffer is frozen, in which case we just reallocate and
// do no shifting of data.
if (_lLockedPos > 0)
{
// and try and preserve the beginning of the marked line if we can
if (_lLockedLinePos < _lLockedPos &&
_lLockedPos - _lLockedLinePos < MAX_LINE_BUFFER)
{
newstart = _lLockedLinePos;
}
}
else if (_lMark > 0)
{
// and try and preserve the beginning of the marked line if we can
newstart = _lMark;
if (_lMarkedlinepos < _lMark &&
_lMark - _lMarkedlinepos < MAX_LINE_BUFFER) // watch out for long lines
{
newstart = _lMarkedlinepos;
}
}
return newstart;
}