566 lines
16 KiB
C++
566 lines
16 KiB
C++
|
/////////////////////////////////////////////////////////////////////////////////
|
||
|
//
|
||
|
// fusion\xmlparser\BufferedStream.cxx
|
||
|
//
|
||
|
/////////////////////////////////////////////////////////////////////////////////
|
||
|
|
||
|
#include "stdinc.h"
|
||
|
#include "core.hxx"
|
||
|
#pragma hdrstop
|
||
|
|
||
|
#include <memory.h>
|
||
|
//#include <shlwapip.h>
|
||
|
#include <ole2.h>
|
||
|
#include <xmlparser.h>
|
||
|
|
||
|
#include "bufferedstream.hxx"
|
||
|
#include "xmlstream.hxx"
|
||
|
#include "encodingstream.hxx"
|
||
|
#include "xmlhelper.hxx"
|
||
|
|
||
|
const long BLOCK_SIZE = 4096;
|
||
|
// no point remembering a line buffer longer than this because client
|
||
|
// probably can't deal with that anyway.
|
||
|
const long MAX_LINE_BUFFER = 512;
|
||
|
|
||
|
BufferedStream::BufferedStream(XMLStream *pXMLStream)
|
||
|
{
|
||
|
_pchBuffer = NULL;
|
||
|
_lSize = 0;
|
||
|
_pXMLStream = pXMLStream;
|
||
|
init();
|
||
|
}
|
||
|
/////////////////////////////////////////////////////////////////////////////
|
||
|
void BufferedStream::init()
|
||
|
{
|
||
|
_lCurrent = _lUsed = _lMark = 0;
|
||
|
_lLine = 1; // lines start at 1.
|
||
|
_lMarkedline = 1;
|
||
|
_lLinepos = 0;
|
||
|
_lMarkedlinepos = 0;
|
||
|
_chLast = 0;
|
||
|
_lStartAt = 0;
|
||
|
_fEof = false;
|
||
|
_lLockedPos = -1;
|
||
|
_lLastWhiteSpace = -1;
|
||
|
_lLockCount = 0;
|
||
|
_fNotified = false;
|
||
|
_fFrozen = false;
|
||
|
_pPendingEncoding = NULL;
|
||
|
}
|
||
|
/////////////////////////////////////////////////////////////////////////////
|
||
|
BufferedStream::~BufferedStream()
|
||
|
{
|
||
|
delete [] _pchBuffer;
|
||
|
_pStmInput = NULL;
|
||
|
delete _pPendingEncoding;
|
||
|
_pPendingEncoding = NULL;
|
||
|
|
||
|
|
||
|
}
|
||
|
/////////////////////////////////////////////////////////////////////////////
|
||
|
HRESULT BufferedStream::Reset()
|
||
|
{
|
||
|
init();
|
||
|
|
||
|
delete[] _pchBuffer;
|
||
|
_pchBuffer = NULL;
|
||
|
_lSize = 0;
|
||
|
_pStmInput = NULL;
|
||
|
_lLockedPos = -1;
|
||
|
_lLockCount = 0;
|
||
|
_fFrozen = false;
|
||
|
delete _pPendingEncoding;
|
||
|
_pPendingEncoding = NULL;
|
||
|
return S_OK;
|
||
|
}
|
||
|
/////////////////////////////////////////////////////////////////////////////
|
||
|
HRESULT
|
||
|
BufferedStream::Load(
|
||
|
/* [unique][in] */ EncodingStream __RPC_FAR *pStm)
|
||
|
{
|
||
|
if (pStm != NULL)
|
||
|
{
|
||
|
init();
|
||
|
_pStmInput = pStm;
|
||
|
return S_OK;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
_pStmInput = NULL;
|
||
|
}
|
||
|
return S_OK;
|
||
|
}
|
||
|
/////////////////////////////////////////////////////////////////////////////
|
||
|
HRESULT
|
||
|
BufferedStream::AppendData( const BYTE* in, ULONG length, BOOL lastBuffer)
|
||
|
{
|
||
|
HRESULT hr;
|
||
|
|
||
|
if (_fEof)
|
||
|
{
|
||
|
init();
|
||
|
}
|
||
|
|
||
|
if (!_pStmInput)
|
||
|
{
|
||
|
EncodingStream* stream = (EncodingStream*)EncodingStream::newEncodingStream(NULL);
|
||
|
if (stream == NULL)
|
||
|
return E_OUTOFMEMORY;
|
||
|
_pStmInput = stream;
|
||
|
stream->Release(); // Smart pointer is holding a ref
|
||
|
}
|
||
|
|
||
|
checkhr2(_pStmInput->AppendData(in, length, lastBuffer));
|
||
|
|
||
|
return S_OK;
|
||
|
|
||
|
}
|
||
|
/////////////////////////////////////////////////////////////////////////////
|
||
|
HRESULT
|
||
|
BufferedStream::nextChar(
|
||
|
/* [out] */ WCHAR* ch,
|
||
|
/* [out] */ bool* fEOF)
|
||
|
{
|
||
|
HRESULT hr;
|
||
|
|
||
|
if (_lCurrent >= _lUsed)
|
||
|
{
|
||
|
if (_fEof)
|
||
|
{
|
||
|
*fEOF = true;
|
||
|
return S_OK;
|
||
|
}
|
||
|
if (! _fNotified && _lUsed > 0)
|
||
|
{
|
||
|
_fNotified = true; // notify data available BEFORE blowing
|
||
|
|
||
|
// NOTE: this code approximates what prepareForInput does
|
||
|
// in order to accurately predict when the buffer is about to
|
||
|
// be re-allocated.
|
||
|
|
||
|
long shift = _fFrozen ? 0 : getNewStart(); // is data about to shift?
|
||
|
long used = _lUsed - shift; // this is how much is really used after shift
|
||
|
if (_lSize - used < BLOCK_SIZE + 1) // +1 for null termination.
|
||
|
{
|
||
|
// we will reallocate !! So return a special
|
||
|
// return code
|
||
|
hr = E_DATA_REALLOCATE;
|
||
|
}
|
||
|
else
|
||
|
hr = E_DATA_AVAILABLE; // away the old data so parser can save it if need be.
|
||
|
checkhr2( _pXMLStream->ErrorCallback(hr) );
|
||
|
}
|
||
|
|
||
|
checkhr2( fillBuffer() );
|
||
|
if (_fEof)
|
||
|
{
|
||
|
*fEOF = true;
|
||
|
return S_OK;
|
||
|
}
|
||
|
_fNotified = false;
|
||
|
}
|
||
|
|
||
|
WCHAR result = _pchBuffer[_lCurrent++];
|
||
|
|
||
|
switch (result)
|
||
|
{
|
||
|
case 0xa:
|
||
|
case 0xd:
|
||
|
if (result == 0xd || _chLast != 0xd)
|
||
|
_lLine++;
|
||
|
_lLinepos = _lCurrent;
|
||
|
_chLast = result;
|
||
|
_lLastWhiteSpace = _lCurrent;
|
||
|
break;
|
||
|
case 0x20:
|
||
|
case 0x9:
|
||
|
_lLastWhiteSpace = _lCurrent;
|
||
|
break;
|
||
|
case 0xfffe:
|
||
|
case 0xffff:
|
||
|
//case 0xfeff:
|
||
|
::FusionpDbgPrintEx(
|
||
|
FUSION_DBG_LEVEL_ERROR,
|
||
|
"SXS.DLL: XML Parser found either 0xfffe or 0xffff\n");
|
||
|
|
||
|
return XML_E_BADCHARDATA;
|
||
|
}
|
||
|
|
||
|
*ch = result;
|
||
|
return S_OK;
|
||
|
}
|
||
|
/////////////////////////////////////////////////////////////////////////////
|
||
|
HRESULT BufferedStream::scanPCData(
|
||
|
/* [out] */ WCHAR* ch,
|
||
|
/* [out] */ bool* fWhitespace)
|
||
|
{
|
||
|
WCHAR result;
|
||
|
bool foundNonWhiteSpace = false;
|
||
|
|
||
|
if (! isWhiteSpace(*ch))
|
||
|
foundNonWhiteSpace = true;
|
||
|
|
||
|
// Then skip the data until we find '<', '>' or '&'
|
||
|
while (_lCurrent < _lUsed)
|
||
|
{
|
||
|
result = _pchBuffer[_lCurrent++];
|
||
|
|
||
|
switch (result)
|
||
|
{
|
||
|
case ']': // xiaoyu : the specified chars can be changed for our own purpose
|
||
|
case '>':
|
||
|
case '<':
|
||
|
case '&':
|
||
|
case '\'': // so this can be used to scan attribute values also.
|
||
|
case '"': // so this can be used to scan attribute values also.
|
||
|
*ch = result;
|
||
|
if (foundNonWhiteSpace)
|
||
|
*fWhitespace = false;
|
||
|
return S_OK;
|
||
|
break;
|
||
|
|
||
|
case 0xa:
|
||
|
case 0xd:
|
||
|
if (result == 0xd || _chLast != 0xd)
|
||
|
_lLine++;
|
||
|
_lLinepos = _lCurrent;
|
||
|
_chLast = result;
|
||
|
_lLastWhiteSpace = _lCurrent;
|
||
|
break;
|
||
|
case 0x20:
|
||
|
case 0x9:
|
||
|
_lLastWhiteSpace = _lCurrent;
|
||
|
break;
|
||
|
case 0xfffe:
|
||
|
case 0xffff:
|
||
|
::FusionpDbgPrintEx(
|
||
|
FUSION_DBG_LEVEL_ERROR,
|
||
|
"SXS.DLL: XML Parser found either 0xfffe or 0xffff\n");
|
||
|
|
||
|
return XML_E_BADCHARDATA;
|
||
|
default:
|
||
|
foundNonWhiteSpace = true;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// And just return E_PENDING if we run out of buffer.
|
||
|
if (foundNonWhiteSpace)
|
||
|
*fWhitespace = false;
|
||
|
return E_PENDING;
|
||
|
}
|
||
|
/////////////////////////////////////////////////////////////////////////////
|
||
|
long BufferedStream::getLine()
|
||
|
{
|
||
|
return _lMarkedline;
|
||
|
}
|
||
|
/////////////////////////////////////////////////////////////////////////////
|
||
|
long BufferedStream::getLinePos()
|
||
|
{
|
||
|
// _lMarkedlinepos is the position of the beginning of the marked line
|
||
|
// relative to the beginning of the buffer, and _lMark is the
|
||
|
// position of the marked token relative to the beginning of the
|
||
|
// buffer, So the position of the marked token relative to the
|
||
|
// current line is the difference between the two.
|
||
|
// We also return a 1-based position so that the start of the
|
||
|
// line = column 1. This is consistent with the line numbers
|
||
|
// which are also 1-based.
|
||
|
return (_lMarkedlinepos > _lMark+1) ? 0 : _lMark+1-_lMarkedlinepos;
|
||
|
}
|
||
|
/////////////////////////////////////////////////////////////////////////////
|
||
|
long BufferedStream::getInputPos()
|
||
|
{
|
||
|
return _lStartAt+_lMark;
|
||
|
}
|
||
|
/////////////////////////////////////////////////////////////////////////////
|
||
|
WCHAR* BufferedStream::getLineBuf(ULONG* len, ULONG* startpos)
|
||
|
{
|
||
|
*len = 0;
|
||
|
if (_pchBuffer == NULL)
|
||
|
return NULL;
|
||
|
|
||
|
WCHAR* result = &_pchBuffer[_lMarkedlinepos];
|
||
|
|
||
|
ULONG i = 0;
|
||
|
// internal _pchBuffer is guarenteed to be null terminated.
|
||
|
WCHAR ch = result[i];
|
||
|
while (ch != 0 && ch != L'\n' && ch != L'\r')
|
||
|
{
|
||
|
i++;
|
||
|
ch = result[i];
|
||
|
}
|
||
|
*len = i;
|
||
|
// also return the line position relative to start of
|
||
|
// returned buffer.
|
||
|
*startpos = (_lMarkedlinepos > _lMark+1) ? 0 : _lMark+1-_lMarkedlinepos;
|
||
|
return result;
|
||
|
}
|
||
|
/////////////////////////////////////////////////////////////////////////////
|
||
|
HRESULT BufferedStream::switchEncoding(const WCHAR * charset, ULONG len)
|
||
|
{
|
||
|
HRESULT hr = S_OK;
|
||
|
|
||
|
if (!_pStmInput)
|
||
|
{
|
||
|
hr = E_FAIL;
|
||
|
goto CleanUp;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
_pPendingEncoding = Encoding::newEncoding(charset, len);
|
||
|
if (_pPendingEncoding == NULL)
|
||
|
{
|
||
|
hr = E_OUTOFMEMORY;
|
||
|
goto CleanUp;
|
||
|
}
|
||
|
|
||
|
if (! _fFrozen)
|
||
|
{
|
||
|
hr = doSwitchEncoding();
|
||
|
}
|
||
|
}
|
||
|
CleanUp:
|
||
|
return hr;
|
||
|
}
|
||
|
/////////////////////////////////////////////////////////////////////////////
|
||
|
HRESULT BufferedStream::doSwitchEncoding()
|
||
|
{
|
||
|
Encoding* encoding = _pPendingEncoding;
|
||
|
_pPendingEncoding = NULL;
|
||
|
|
||
|
HRESULT hr = _pStmInput->switchEncodingAt(encoding, _lStartAt + _lCurrent);
|
||
|
if (hr == S_FALSE)
|
||
|
{
|
||
|
// need to re-read to force re-decode into new encoding.
|
||
|
// In other words we have to forget that we read past this
|
||
|
// position already so that the next call to nextChar
|
||
|
// will call FillBuffer again.
|
||
|
// (+1 so that nextChar works correctly).
|
||
|
_lUsed = _lStartAt + _lCurrent;
|
||
|
hr = S_OK;
|
||
|
}
|
||
|
else if (FAILED(hr))
|
||
|
{
|
||
|
hr = (hr == E_INVALIDARG) ? XML_E_INVALIDENCODING : XML_E_INVALIDSWITCH;
|
||
|
}
|
||
|
return hr;
|
||
|
}
|
||
|
/////////////////////////////////////////////////////////////////////////////
|
||
|
// Returns a pointer to a contiguous block of text accumulated
|
||
|
// from the last time Mark() was called up to but not including
|
||
|
// the last character read. (This allows a parser to have a
|
||
|
// lookahead character that is not included in the token).
|
||
|
HRESULT
|
||
|
BufferedStream::getToken(const WCHAR**p, long* len)
|
||
|
{
|
||
|
if (_pchBuffer == NULL)
|
||
|
return E_FAIL;
|
||
|
|
||
|
if (_lCurrent != _lCurrent2)
|
||
|
{
|
||
|
// need to fix up buffer since it is no
|
||
|
// out of sync since we've been compressing
|
||
|
// whitespace.
|
||
|
|
||
|
}
|
||
|
*p = &_pchBuffer[_lMark];
|
||
|
*len = getTokenLength();
|
||
|
return S_OK;
|
||
|
}
|
||
|
/////////////////////////////////////////////////////////////////////////////
|
||
|
void
|
||
|
BufferedStream::Lock()
|
||
|
{
|
||
|
// We allow nested locking - where the outer lock wins - unlock only
|
||
|
// really unlocks when the outer lock is unlocked.
|
||
|
if (++_lLockCount == 1)
|
||
|
{
|
||
|
_lLockedPos = _lMark;
|
||
|
_lLockedLine = _lMarkedline;
|
||
|
_lLockedLinePos = _lMarkedlinepos;
|
||
|
}
|
||
|
}
|
||
|
/////////////////////////////////////////////////////////////////////////////
|
||
|
void
|
||
|
BufferedStream::UnLock()
|
||
|
{
|
||
|
if (--_lLockCount == 0)
|
||
|
{
|
||
|
_lMark = _lLockedPos;
|
||
|
_lMarkedline = _lLockedLine;
|
||
|
_lMarkedlinepos = _lLockedLinePos;
|
||
|
_lLockedPos = -1;
|
||
|
}
|
||
|
}
|
||
|
/////////////////////////////////////////////////////////////////////////////
|
||
|
HRESULT
|
||
|
BufferedStream::Freeze()
|
||
|
{
|
||
|
HRESULT hr;
|
||
|
if (_lCurrent > _lMidPoint)
|
||
|
{
|
||
|
// Since we freeze the buffer a lot now (any time we're inside
|
||
|
// a tag) we need to shift the bytes down in the buffer more
|
||
|
// frequently in order to guarentee we have space in the buffer
|
||
|
// when we need it. Otherwize the buffer would tend to just
|
||
|
// keep growing and growing. So we shift the buffer when we
|
||
|
// go past the midpoint.
|
||
|
checkhr2( prepareForInput() );
|
||
|
|
||
|
}
|
||
|
_fFrozen = true;
|
||
|
return S_OK;
|
||
|
}
|
||
|
/////////////////////////////////////////////////////////////////////////////
|
||
|
HRESULT
|
||
|
BufferedStream::UnFreeze()
|
||
|
{
|
||
|
_fFrozen = false;
|
||
|
if (_pPendingEncoding)
|
||
|
{
|
||
|
return doSwitchEncoding();
|
||
|
}
|
||
|
return S_OK;
|
||
|
}
|
||
|
/////////////////////////////////////////////////////////////////////////////
|
||
|
HRESULT
|
||
|
BufferedStream::fillBuffer()
|
||
|
{
|
||
|
HRESULT hr;
|
||
|
|
||
|
checkhr2( prepareForInput() );
|
||
|
|
||
|
if (_pStmInput)
|
||
|
{
|
||
|
long space = _lSize - _lUsed - 1; // reserve 1 for NULL termination
|
||
|
|
||
|
// get more bytes.
|
||
|
ULONG read = 0;
|
||
|
HRESULT rc = _pStmInput->Read(&_pchBuffer[_lUsed], space*sizeof(WCHAR), &read);
|
||
|
|
||
|
_lUsed += read/sizeof(WCHAR); // stream must return unicode characters.
|
||
|
_pchBuffer[_lUsed] = 0; // NULL terminate the _pchBuffer.
|
||
|
|
||
|
if (FAILED(rc))
|
||
|
return rc;
|
||
|
|
||
|
if (read == 0)
|
||
|
{
|
||
|
_fEof = true;
|
||
|
// increment _lCurrent, so that getToken returns
|
||
|
// last character in file.
|
||
|
_lCurrent++; _lCurrent2++;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
// SetInput or AppendData hasn't been called yet.
|
||
|
return E_PENDING;
|
||
|
}
|
||
|
|
||
|
return S_OK;
|
||
|
}
|
||
|
/////////////////////////////////////////////////////////////////////////////
|
||
|
HRESULT
|
||
|
BufferedStream::prepareForInput()
|
||
|
{
|
||
|
// move the currently used section of the _pchBuffer
|
||
|
// (buf[mark] to buf[used]) down to the beginning of
|
||
|
// the _pchBuffer.
|
||
|
|
||
|
long newstart = 0;
|
||
|
|
||
|
// BUGBUG - if this code is changed BufferedStream::nextChar has to
|
||
|
// be updated also so that they stay in sync, otherwise we might
|
||
|
// re-allocated the buffer without generating an E_DATA_REALLOCATE
|
||
|
// notification - which would be very bad (causes GPF's in the parser).
|
||
|
|
||
|
if (! _fFrozen) // can't shift bits if the buffer is frozen.
|
||
|
{
|
||
|
newstart = getNewStart();
|
||
|
|
||
|
if (newstart > 0)
|
||
|
{
|
||
|
WCHAR* src = &_pchBuffer[newstart];
|
||
|
_lUsed -= newstart;
|
||
|
_lStartAt += newstart;
|
||
|
::memmove(_pchBuffer,src,_lUsed*sizeof(WCHAR));
|
||
|
_lCurrent -= newstart;
|
||
|
_lCurrent2 -= newstart;
|
||
|
_lLastWhiteSpace -= newstart;
|
||
|
_lLinepos = (_lLinepos > newstart) ? _lLinepos-newstart : 0;
|
||
|
_lMarkedlinepos = (_lLinepos > newstart) ? _lMarkedlinepos-newstart : 0;
|
||
|
_lMark -= newstart;
|
||
|
_lLockedLinePos = (_lLockedLinePos > newstart) ? _lLockedLinePos-newstart : 0;
|
||
|
_lLockedPos -= newstart;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// make sure we have a reasonable amount of space
|
||
|
// left in the _pchBuffer.
|
||
|
long space = _lSize - _lUsed;
|
||
|
if (space > 0) space--; // reserve 1 for NULL termination
|
||
|
if (_pchBuffer == NULL || space < BLOCK_SIZE)
|
||
|
{
|
||
|
// double the size of the buffer.
|
||
|
long newsize = (_lSize == 0) ? BLOCK_SIZE : (_lSize*2);
|
||
|
|
||
|
WCHAR* newbuf = NEW (WCHAR[newsize]);
|
||
|
if (newbuf == NULL)
|
||
|
{
|
||
|
// try more conservative allocation.
|
||
|
newsize = _lSize + BLOCK_SIZE;
|
||
|
newbuf = NEW (WCHAR[newsize]);
|
||
|
}
|
||
|
if (newbuf == NULL && space == 0)
|
||
|
return E_OUTOFMEMORY;
|
||
|
|
||
|
if (newbuf != NULL)
|
||
|
{
|
||
|
if (_pchBuffer != NULL)
|
||
|
{
|
||
|
// copy old bytes to new _pchBuffer.
|
||
|
::memcpy(newbuf,_pchBuffer,_lUsed*sizeof(WCHAR));
|
||
|
delete [] _pchBuffer;
|
||
|
}
|
||
|
newbuf[_lUsed] = 0; // make sure it's null terminated.
|
||
|
_pchBuffer = newbuf;
|
||
|
_lSize = newsize;
|
||
|
_lMidPoint = newsize / 2;
|
||
|
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return S_OK;
|
||
|
}
|
||
|
/////////////////////////////////////////////////////////////////////////////
|
||
|
long
|
||
|
BufferedStream::getNewStart()
|
||
|
{
|
||
|
long newstart = 0;
|
||
|
|
||
|
// Unless the buffer is frozen, in which case we just reallocate and
|
||
|
// do no shifting of data.
|
||
|
if (_lLockedPos > 0)
|
||
|
{
|
||
|
// and try and preserve the beginning of the marked line if we can
|
||
|
if (_lLockedLinePos < _lLockedPos &&
|
||
|
_lLockedPos - _lLockedLinePos < MAX_LINE_BUFFER)
|
||
|
{
|
||
|
newstart = _lLockedLinePos;
|
||
|
}
|
||
|
}
|
||
|
else if (_lMark > 0)
|
||
|
{
|
||
|
// and try and preserve the beginning of the marked line if we can
|
||
|
newstart = _lMark;
|
||
|
if (_lMarkedlinepos < _lMark &&
|
||
|
_lMark - _lMarkedlinepos < MAX_LINE_BUFFER) // watch out for long lines
|
||
|
{
|
||
|
newstart = _lMarkedlinepos;
|
||
|
}
|
||
|
}
|
||
|
return newstart;
|
||
|
}
|