windows-nt/Source/XPSP1/NT/windows/richedit/re41/text.cpp
2020-09-26 16:20:57 +08:00

2914 lines
74 KiB
C++

/*
* @doc INTERNAL
*
* @module TEXT.C -- CTxtPtr implementation |
*
* Authors: <nl>
* Original RichEdit code: David R. Fulmer <nl>
* Christian Fortini <nl>
* Murray Sargent <nl>
*
* History: <nl>
* 6/25/95 alexgo cleanup and reorganization (use run pointers now)
*
* Copyright (c) 1995-2000, Microsoft Corporation. All rights reserved.
*/
#include "_common.h"
#include "_text.h"
#include "_edit.h"
#include "_antievt.h"
#include "_clasfyc.h"
#include "_txtbrk.h"
ASSERTDATA
//-----------------------------Internal functions--------------------------------
// Text Block management
static void TxDivideInsertion(LONG cch, LONG ichBlock, LONG cchAfter,
LONG *pcchFirst, LONG *pcchLast);
/*
* IsWhiteSpace(ch)
*
* @func
* Used to determine if ch is an EOP char (see IsEOP() for definition),
* TAB or blank. This function is used in identifying sentence start
* and end.
*
* @rdesc
* TRUE if ch is whitespace
*/
BOOL IsWhiteSpace(unsigned ch)
{
return ch == ' ' || IN_RANGE(CELL, ch, CR) || (ch | 1) == PS;
}
/*
* IsSentenceTerminator(ch)
*
* @func
* Used to determine if ch is a standard sentence terminator character,
* namely, '?', '.', or '!'
*
* @rdesc
* TRUE if ch is a question mark, period, or exclamation point.
*/
BOOL IsSentenceTerminator(unsigned ch)
{
return ch == '?' || ch == '.' || ch == '!'; // Std sentence delimiters
}
// =========================== Invariant stuff ==================================================
#define DEBUG_CLASSNAME CTxtPtr
#include "_invar.h"
// =============================== CTxtPtr ======================================================
#ifdef DEBUG
/*
* CTxtPtr::Invariant
*
* @mfunc invariant check
*/
BOOL CTxtPtr::Invariant() const
{
static LONG numTests = 0;
numTests++; // Counts how many times we've been called
// Make sure _cp is within range
Assert(_cp >= 0);
Update_pchCp();
CRunPtrBase::Invariant();
if(IsValid())
{
// We use less than or equals here so that we can be an insertion
// point at the *end* of the currently existing text.
Assert(_cp <= GetTextLength());
// Make sure all the blocks are consistent...
Assert(GetTextLength() == ((CTxtArray *)_pRuns)->Invariant());
Assert(_cp == CRunPtrBase::CalculateCp());
}
else
{
Assert(_ich == 0);
}
return TRUE;
}
/*
* CTxtPtr::Update_pchCp ()
*
* @mfunc
* Define _pchCp to be ptr to text at _cp
*/
void CTxtPtr::Update_pchCp() const
{
LONG cchValid;
*(LONG_PTR *)&_pchCp = (LONG_PTR)GetPch(cchValid);
if(!cchValid)
*(LONG_PTR *)&_pchCp = (LONG_PTR)GetPchReverse(cchValid);
}
/*
* CTxtPtr::MoveGapToEndOfBlock ()
*
* @mfunc
* Function to move buffer gap to current block end to aid in debugging
*/
void CTxtPtr::MoveGapToEndOfBlock () const
{
CTxtBlk *ptb = GetRun(0);
ptb->MoveGap(ptb->_cch); // Move gaps to end of cur block
Update_pchCp();
}
#endif // DEBUG
/*
* CTxtPtr::CTxtPtr(ped, cp)
*
* @mfunc constructor
*/
CTxtPtr::CTxtPtr (
CTxtEdit *ped, //@parm Ptr to CTxtEdit instance
LONG cp) //@parm cp to set the pointer to
{
TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::CTxtPtr");
_ped = ped;
_cp = 0;
SetRunArray((CRunArray *) &ped->GetTxtStory()->_TxtArray);
if(IsValid())
_cp = BindToCp(cp);
}
/*
* CTxtPtr::CTxtPtr(&tp)
*
* @mfunc Copy Constructor
*/
CTxtPtr::CTxtPtr (
const CTxtPtr &tp)
{
TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::CTxtPtr");
// copy all the values over
*this = tp;
}
/*
* CTxtPtr::GetTextLength()
*
* @mfunc
* Return count of characters in the story pointed to by this
* text ptr. Includes the story's final CR in the count
*
* @rdesc
* cch for the story pointed to by this text ptr
*
* @devnote
* This method returns 0 if the text ptr is a zombie, a state
* identified by _ped = NULL.
*/
LONG CTxtPtr::GetTextLength() const
{
return _ped ? ((CTxtArray *)_pRuns)->_cchText : 0;
}
/*
* CTxtPtr::GetChar()
*
* @mfunc
* Return character at this text pointer, NULL if text pointer is at
* end of text
*
* @rdesc
* Character at this text ptr
*/
WCHAR CTxtPtr::GetChar()
{
TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::GetChar");
LONG cchValid;
const WCHAR *pch = GetPch(cchValid);
return pch ? *pch : 0;
}
/*
* CTxtPtr::GetPrevChar()
*
* @mfunc
* Return character just before this text pointer, NULL if text pointer
* beginning of text
*
* @rdesc
* Character just before this text ptr
*/
WCHAR CTxtPtr::GetPrevChar()
{
TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::GetPrevChar");
LONG cchValid;
const WCHAR *pch = GetPchReverse(cchValid);
return pch ? *(pch - 1) : 0;
}
/*
* CTxtPtr::GetPch(&cchValid)
*
* @mfunc
* return a character pointer to the text at this text pointer
*
* @rdesc
* a pointer to an array of characters. May be NULL. If non-null,
* then cchValid is guaranteed to be at least 1
*/
const WCHAR * CTxtPtr::GetPch(
LONG & cchValid) const //@parm Count of chars for which ptr is valid
{
TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::GetPch");
// returned pointer is valid
LONG ich = _ich;
WCHAR * pchBase;
CTxtBlk * ptb = IsValid() ? GetRun(0) : NULL;
cchValid = 0; // Default nothing valid
if(!ptb)
return NULL;
// If we're at the edge of a run, grab the next run or
// stay at the current run.
if(_ich == ptb->_cch)
{
if(_iRun < Count() - 1)
{
// Set us to the next text block
ptb = GetRun(1);
ich = 0;
}
else // At very end of text:
return NULL; // just return NULL
}
AssertSz(CbOfCch(ich) <= ptb->_cbBlock,
"CTxtPtr::GetPch(): _ich bigger than block");
pchBase = ptb->_pch + ich;
// Check to see if we need to skip over gap. Recall that
// the gap may come anywhere in the middle of a block,
// so if the current ich (note, no underscore, we want
// the active ich) is beyond the gap, then recompute pchBase
// by adding in the size of the block.
//
// cchValid will then be the number of characters left in
// the text block (or _cch - ich)
if(CbOfCch(ich) >= ptb->_ibGap)
{
pchBase += CchOfCb(ptb->_cbBlock) - ptb->_cch;
cchValid = ptb->_cch - ich;
}
else
{
// We're valid until the buffer gap (or see below).
cchValid = CchOfCb(ptb->_ibGap) - ich;
}
AssertSz(cchValid > 0 && GetCp() + cchValid <= GetTextLength(),
"CTxtPtr::GetPch: illegal cchValid");
return pchBase;
}
/*
* CTxtPtr::GetPchReverse(&cchValidReverse, pcchValid)
*
* @mfunc
* return a character pointer to the text at this text pointer
* adjusted so that there are some characters valid *behind* the
* pointer.
*
* @rdesc
* a pointer to an array of characters. May be NULL. If non-null,
* then cchValidReverse is guaranteed to be at least 1
*/
const WCHAR * CTxtPtr::GetPchReverse(
LONG & cchValidReverse, //@parm length for reverse
LONG * pcchValid) const //@parm length forward
{
TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::GetPchReverse");
LONG cchTemp;
LONG ich = _ich;
WCHAR * pchBase;
CTxtBlk * ptb = IsValid() ? GetRun(0) : NULL;
cchValidReverse = 0; // Default no valid chars in run
if(!ptb)
return NULL;
// If we're at the edge of a run, grab the previous run or
// stay at the current run.
if(!_ich)
{
if(_iRun)
{
ptb = GetRun(-1); // Go to next text block
ich = ptb->_cch;
}
else // At start of text:
return NULL; // just return NULL
}
AssertSz(CbOfCch(ich) <= ptb->_cbBlock,
"CTxtPtr::GetPchReverse(): _ich bigger than block");
pchBase = ptb->_pch + ich;
// Check to see if we need to skip over gap. Recall that
// the game may come anywhere in the middle of a block,
// so if the current ich (note, no underscore, we want
// the active ich) is at least one char past the gap, then recompute
// pchBase by adding the size of the gap (so that it's after
// the gap). This differs from GetPch(), which works forward and
// wants pchBase to include the gap size if ich is at the gap, let
// alone one or more chars past it.
//
// Also figure out the count of valid characters. It's
// either the count of characters from the beginning of the
// text block, i.e. ich, or the count of characters from the
// end of the buffer gap.
cchValidReverse = ich; // Default for ich <= gap offset
cchTemp = ich - CchOfCb(ptb->_ibGap); // Calculate displacement
if(cchTemp > 0) // Positive: pchBase is after gap
{
cchValidReverse = cchTemp;
pchBase += CchOfCb(ptb->_cbBlock) - ptb->_cch; // Add in gap size
}
if(pcchValid) // if client needs forward length
{
if(cchTemp > 0)
cchTemp = ich - ptb->_cch;
else
cchTemp = -cchTemp;
*pcchValid = cchTemp;
}
AssertSz(cchValidReverse > 0 && GetCp() - cchValidReverse >= 0,
"CTxtPtr::GetPchReverse: illegal cchValidReverse");
return pchBase;
}
/*
* CTxtPtr::GetCharFlagsInRange(cch, iCharRepDefault)
*
* @mfunc
* return CharFlags for the range of chars starting at this text pointer
* for cch chars.
*
* @rdesc
* CharFlags for the range of chars
*/
QWORD CTxtPtr::GetCharFlagsInRange(
LONG cch,
BYTE iCharRepDefault)
{
QWORD qw = 0;
QWORD qw0;
WCHAR szch[10];
cch = min(cch + 1, 10);
cch = GetText(cch, szch);
for(WCHAR *pch = szch; cch > 0; cch--, pch++)
{
qw0 = GetCharFlags(pch, cch, iCharRepDefault);
if(qw0 & FSURROGATE)
{
cch--;
pch++;
}
qw |= qw0;
}
return qw;
}
/*
* CTxtPtr::BindToCp(cp)
*
* @mfunc
* set cached _cp = cp (or nearest valid value)
*
* @rdesc
* _cp actually set
*
* @comm
* This method overrides CRunPtrBase::BindToCp to keep _cp up to date
* correctly.
*
* @devnote
* Do *not* call this method when high performance is needed; use
* Move() instead, which moves from 0 or from the cached
* _cp, depending on which is closer.
*/
LONG CTxtPtr::BindToCp(
LONG cp) //@parm char position to bind to
{
TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::BindToCp");
_cp = CRunPtrBase::BindToCp(cp, GetTextLength());
// We want to be able to use this routine to fix up things so we don't
// check invariants on entry.
_TEST_INVARIANT_
return _cp;
}
/*
* CTxtPtr::SetCp(cp)
*
* @mfunc
* 'efficiently' sets cp by advancing from current position or from 0,
* depending on which is closer
*
* @rdesc
* cp actually set to
*/
LONG CTxtPtr::SetCp(
LONG cp) //@parm char position to set to
{
TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::SetCp");
Move(cp - _cp);
return _cp;
}
/*
* CTxtPtr::Move(cch)
*
* @mfunc
* Move cp by cch characters
*
* @rdesc
* Actual number of characters Moved by
*
* @comm
* We override CRunPtrBase::Move so that the cached _cp value
* can be correctly updated and so that the move can be made
* from the cached _cp or from 0, depending on which is closer.
*
* @devnote
* It's also easy to bind at the end of the story. So an improved
* optimization would bind there if 2*(_cp + cch) > _cp + text length.
*/
LONG CTxtPtr::Move(
LONG cch) // @parm count of chars to move by
{
TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::Move");
if(!IsValid()) // No runs yet, so don't go
return 0; // anywhere
const LONG cpSave = _cp; // Save entry _cp
LONG cp = cpSave + cch; // Requested target cp (maybe < 0)
if(cp < cpSave/2) // Closer to 0 than cached cp
{
cp = max(cp, 0); // Don't undershoot
_cp = CRunPtrBase::BindToCp(cp);
}
else
_cp += CRunPtrBase::Move(cch); // exist
// NB! the invariant check needs to come at the end; we may be
// moving 'this' text pointer in order to make it valid again
// (for the floating range mechanism).
_TEST_INVARIANT_
return _cp - cpSave; // cch this CTxtPtr moved
}
/*
* CTxtPtr::GetText(cch, pch)
*
* @mfunc
* get a range of cch characters starting at this text ptr. A literal
* copy is made, i.e., with no CR -> CRLF and WCH_EMBEDDING -> ' '
* translations. For these translations, see CTxtPtr::GetPlainText()
*
* @rdesc
* count of characters actually copied
*
* @comm
* Doesn't change this text ptr
*/
LONG CTxtPtr::GetText(
LONG cch, //@parm Count of characters to get
WCHAR * pch) //@parm Buffer to copy the text into
{
TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::GetText");
LONG cchSave = cch;
LONG cchValid;
const WCHAR *pchRead;
CTxtPtr tp(*this);
_TEST_INVARIANT_
// Use tp to read valid blocks of text until all the requested
// text is read or until the end of story is reached.
while( cch )
{
pchRead = tp.GetPch(cchValid);
if(!pchRead) // No more text
break;
cchValid = min(cchValid, cch);
CopyMemory(pch, pchRead, cchValid*sizeof(WCHAR));
pch += cchValid;
cch -= cchValid;
tp.Move(cchValid);
}
return cchSave - cch;
}
#ifndef NOCOMPLEXSCRIPTS
/*
* OverRideNeutralChar(ch)
*
* @mfunc
* Helper for overriding BiDi neutral character classification.
* Option is used in Access Expression Builder.
*
* @rdesc
* Modified character or unmodified input character
*/
WCHAR OverRideNeutralChar(WCHAR ch)
{
if(ch < '!')
return ch == CELL ? CR : ch;
if(ch > '}')
return ch;
if (IN_RANGE('!', ch, '>'))
{
// True for !"#&'()*+,-./:;<=>
if ((0x00000001 << (ch - TEXT(' '))) & 0x7C00FFCE)
ch = 'a';
}
if (IN_RANGE('[', ch, '^') || ch == '{' || ch == '}')
{
// True for [/]^{}
ch = 'a';
}
return ch;
}
/*
* CTxtPtr::GetTextForUsp(cch, pch, fNeutralOverride)
*
* @mfunc
* get a range of cch characters starting at this text ptr. A literal
* copy is made, with translation to fool Uniscribe classification
*
* @rdesc
* count of characters actually copied
*
* @comm
* Doesn't change this text ptr
*/
LONG CTxtPtr::GetTextForUsp(
LONG cch, //@parm Count of characters to get
WCHAR * pch, //@parm Buffer to copy the text into
BOOL fNeutralOverride) //@parm Neutral override option
{
TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::GetTextForUsp");
LONG cchSave = cch;
LONG cchValid;
const WCHAR *pchRead;
CTxtPtr tp(*this);
int i;
WCHAR xltchar;
_TEST_INVARIANT_
// Use tp to read valid blocks of text until all the requested
// text is read or until the end of story is reached.
while( cch )
{
pchRead = tp.GetPch(cchValid);
if(!pchRead) // No more text
break;
cchValid = min(cchValid, cch);
if (!fNeutralOverride)
{
for (i = 0; i < cchValid; i++)
{
xltchar = pchRead[i];
if(xltchar <= '$')
{
if(xltchar >= '#')
xltchar = '@';
if(xltchar == CELL)
xltchar = CR;
}
pch[i] = xltchar;
}
}
else
{
for (i = 0; i < cchValid; i++)
{
pch[i] = OverRideNeutralChar(pchRead[i]);
}
}
pch += cchValid;
cch -= cchValid;
tp.Move(cchValid);
}
return cchSave - cch;
}
#endif
/*
* CTxtPtr::GetPlainText(cchBuff, pch, cpMost, fTextize)
*
* @mfunc
* Copy up to cchBuff characters or up to cpMost, whichever comes
* first, translating lone CRs into CRLFs. Move this text ptr just
* past the last character processed. If fTextize, copy up to but
* not including the first WCH_EMBEDDING char. If not fTextize,
* replace WCH_EMBEDDING by a blank since RichEdit 1.0 does.
*
* @rdesc
* Count of characters copied
*
* @comm
* An important feature is that this text ptr is moved just past the
* last char copied. In this way, the caller can conveniently read
* out plain text in bufferfuls of up to cch chars, which is useful for
* stream I/O. This routine won't copy the final CR even if cpMost
* is beyond it.
*/
LONG CTxtPtr::GetPlainText(
LONG cchBuff, //@parm Buffer cch
WCHAR * pch, //@parm Buffer to copy text into
LONG cpMost, //@parm Largest cp to get
BOOL fTextize, //@parm True if break on WCH_EMBEDDING
BOOL fUseCRLF) //@parm If TRUE, CR or LF -> CRLF
{
TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::GetPlainText");
LONG cch = cchBuff; // Countdown counter
LONG cchValid; // Valid ptr cch
LONG cchT; // Temporary cch
unsigned ch; // Current char
unsigned chPrev = 0; // Previous char
const WCHAR *pchRead; // Backing-store ptr
_TEST_INVARIANT_
AdjustCRLF(); // Be sure we start on an EOP bdy
if(_ped->Get10Mode()) // RE 1.0 delivers EOP chars as
fUseCRLF = FALSE; // they appear in backing store
LONG cchText = _ped->GetAdjustedTextLength();
cpMost = min(cpMost, cchText); // Don't write final CR
if(GetCp() >= cpMost)
return 0;
while(cch > 0) // While room in buffer
{
if(!(pchRead = GetPch(cchValid))) // No more chars available
break; // so we're out of here
cchT = GetCp() + cchValid - cpMost;
if(cchT > 0) // Don't overshoot
{
cchValid -= cchT;
if(cchValid <= 0)
break; // Nothing left before cpMost
}
for(cchT = 0; cch > 0 && cchT < cchValid; cchT++, cch--, chPrev = ch)
{
ch = *pch++ = *pchRead++; // Copy next char (but don't
if(IN_RANGE(CELL, ch, CR)) // count it yet)
{
if(IsASCIIEOP(ch)) // LF, VT, FF, CR
{
if(!fUseCRLF || ch == FF)
continue;
if (ch == CR && chPrev == ENDFIELD &&
cchValid - cchT > 1 &&
*pchRead == STARTFIELD)
{
*(pch - 1) = ' '; // New table row follows old:
continue; // use only 1 CRLF
}
Move(cchT); // Move up to CR
if(cch < 2) // No room for LF, so don't
goto done; // count CR either
// Bypass EOP w/o worrying about
cchT = AdvanceCRLF(FALSE);// buffer gaps and blocks
if(cchT > 2) // Translate CRCRLF to ' '
{ // Usually copied count exceeds
Assert(cchT == 3); // internal count, but CRCRLFs
*(pch - 1) = ' '; // reduce the relative increase:
} // NB: error for EM_GETTEXTLENGTHEX
else // CRLF or lone CR
{ // Store LF in both cases for
*(pch - 1) = CR; // Be sure it's a CR not a VT,
*pch++ = LF; // Windows. No LF for Mac
cch--; // One less for target buffer
}
cch--; // CR (or ' ') copied
cchT = 0; // Don't Move() more below
break; // Go get new pchRead & cchValid
}
else if(ch == CELL) // Use TAB for cell end markers
*(pch - 1) = TAB;
}
else if(ch >= STARTFIELD)
{ // Object lives here
if(fTextize && ch == WCH_EMBEDDING) // Break on WCH_EMBEDDING
{
Move(cchT); // Move this text ptr up to
goto done; // WCH_EMBEDDING and return
}
*(pch - 1) = ' '; // Replace embedding char by ' '
}
}
Move(cchT);
}
done:
return cchBuff - cch;
}
/*
* CTxtPtr::AdvanceCRLF(fMulticharAdvance)
*
* @mfunc
* Move text pointer by one character, safely advancing
* over CRLF, CRCRLF, and UTF-16 combinations
*
* @rdesc
* Number of characters text pointer has been moved by
*/
LONG CTxtPtr::AdvanceCRLF(
BOOL fMulticharAdvance) //@parm If TRUE, advance over combining-mark sequences
{
TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::AdvanceCRLF");
_TEST_INVARIANT_
LONG cp;
LONG cpSave = _cp;
WCHAR ch = GetChar(); // Char on entry
WCHAR ch1 = NextChar(); // Advance to and get next char
BOOL fTwoCRs = FALSE;
BOOL fCombiningMark = FALSE;
if(ch == CR)
{
if(ch1 == CR && _cp < GetTextLength())
{
fTwoCRs = TRUE; // Need at least 3 chars to
ch1 = NextChar(); // have CRCRLF at end
}
if(ch1 == LF)
Move(1); // Bypass CRLF
else if(fTwoCRs)
Move(-1); // Only bypass one CR of two
AssertSz(_ped->fUseCRLF() || _cp == cpSave + 1,
"CTxtPtr::AdvanceCRLF: EOP isn't a single char");
}
// Handle Unicode UTF-16 surrogates
if(IN_RANGE(0xD800, ch, 0xDBFF)) // Started on UTF-16 lead word
{
if (IN_RANGE(0xDC00, ch1, 0xDFFF))
Move(1); // Bypass UTF-16 trail word
else
AssertSz(FALSE, "CTxtPtr::AdvanceCRLF: illegal Unicode surrogate combo");
}
if (fMulticharAdvance)
{
while(IN_RANGE(0x300, ch1, 0x36F)) // Bypass combining diacritical marks
{
fCombiningMark = TRUE;
cp = _cp;
ch1 = NextChar();
if (_cp == cp)
break;
}
}
if(IN_RANGE(STARTFIELD, ch, ENDFIELD))
Move(1); // Bypass field type
LONG cch = _cp - cpSave;
AssertSz(!cch || cch == 1 || fCombiningMark ||
cch == 2 && (IN_RANGE(0xD800, ch, 0xDBFF) ||
IN_RANGE(STARTFIELD, ch, ENDFIELD)) ||
(_ped->fUseCRLF() && GetPrevChar() == LF &&
(cch == 2 || cch == 3 && fTwoCRs)),
"CTxtPtr::AdvanceCRLF(): Illegal multichar");
return cch; // # chars bypassed
}
/*
* CTxtPtr::NextChar()
*
* @mfunc
* Increment this text ptr and return char it points at
*
* @rdesc
* Next char
*/
WCHAR CTxtPtr::NextChar()
{
TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::NextChar");
_TEST_INVARIANT_
Move(1);
return GetChar();
}
/*
* CTxtPtr::PrevChar()
*
* @mfunc
* Decrement this text ptr and return char it points at
*
* @rdesc
* Previous char
*/
WCHAR CTxtPtr::PrevChar()
{
TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::PrevChar");
_TEST_INVARIANT_
return Move(-1) ? GetChar() : 0;
}
/*
* CTxtPtr::BackupCRLF(fMulticharBackup)
*
* @mfunc
* Backup text pointer by one character, safely backing up
* over CRLF, CRCRLF, and UTF-16 combinations
*
* @rdesc
* Number of characters text pointer has been moved by
*
* @future
* Backup over Unicode combining marks
*/
LONG CTxtPtr::BackupCRLF(
BOOL fMulticharBackup) //@parm If TRUE, backup over combining-mark sequences
{
TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::BackupCRLF");
_TEST_INVARIANT_
LONG cpSave = _cp;
WCHAR ch = PrevChar(); // Moves to and get previous char
if(fMulticharBackup)
{ // Bypass combining diacritical marks
while(IN_RANGE(0x300, ch, 0x36F))
ch = PrevChar();
}
// Handle Unicode UTF-16 surrogates
if(_cp && IN_RANGE(0xDC00, ch, 0xDFFF))
{
ch = PrevChar();
if (!IN_RANGE(0xD800, ch, 0xDBFF))
{
AssertSz(FALSE, "CTxtPtr::BackupCRLF: illegal Unicode surrogate combo");
ch = NextChar();
}
}
if(ch == LF) // Try to back up 1 char in any case
{
if(_cp && PrevChar() != CR) // If LF, does prev char = CR?
Move(1); // No, leave tp at LF
else if(_cp && !IsAfterTRD(0) &&// At CRLF. If not after TRD
PrevChar() != CR) // and prev char != CR, leave
{ // at CRLF
Move(1);
}
}
else if(IN_RANGE(STARTFIELD, GetPrevChar(), ENDFIELD))
Move(-1); // Bypass field type
AssertSz( _cp == cpSave ||
ch == LF && GetChar() == CR ||
!(ch == LF || fMulticharBackup &&
(IN_RANGE(0x300, ch, 0x36F) ||
IN_RANGE(0xDC00, ch, 0xDFFF) && IN_RANGE(0xD800, GetPrevChar(), 0xDBFF)) ),
"CTxtPtr::BackupCRLF(): Illegal multichar");
return _cp - cpSave; // - # chars this CTxtPtr moved
}
/*
* CTxtPtr::AdjustCRLF(iDir)
*
* @mfunc
* Adjust the position of this text pointer to the beginning of a CRLF
* or CRCRLF combination, if it is in the middle of such a combination.
* Move text pointer to the beginning/end (for iDir neg/pos) of a Unicode
* surrogate pair or a STARTFIELD/ENDFIELD pair if it is in the middle
* of such a pair.
*
* @rdesc
* Number of characters text pointer has been moved by
*
* @future
* Adjust to beginning of sequence containing Unicode combining marks
*/
LONG CTxtPtr::AdjustCRLF(
LONG iDir) //@parm Move forward/backward for iDir = 1/-1, respectively
{
TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::AdjustCpCRLF");
_TEST_INVARIANT_
UINT ch = GetChar();
LONG cpSave = _cp;
if(!_cp) // Alignment always correct
return 0; // at cp 0
iDir = iDir < 0 ? -1 : 1;
// Handle Unicode UTF-16 surrogates
if(IN_RANGE(0xDC00, ch, 0xDFFF)) // Landed on UTF-16 trail word
{
AssertSz(IN_RANGE(0xD800, GetPrevChar(), 0xDBFF),
"CTxtPtr::AdjustCRLF: illegal Unicode surrogate combo");
return Move(iDir); // Backup to UTF-16 lead word or
} // move forward to next char
UINT chPrev = GetPrevChar();
if(IN_RANGE(STARTFIELD, chPrev, ENDFIELD) && chPrev != 0xFFFA)
return Move(iDir);
if(!IsASCIIEOP(ch) || IsAfterTRD(0)) // Early out
return 0;
if(ch == LF && chPrev == CR) // Landed on LF preceded by CR:
Move(-1); // move to CR for CRCRLF test
// Leave as adjust-forward only behavior for RE 1.0 compatibility on
// CRCRLF and CRLF
if(GetChar() == CR) // Land on a CR of CRLF or
{ // second CR of CRCRLF?
CTxtPtr tp(*this);
if(tp.NextChar() == LF)
{
tp.Move(-2); // First CR of CRCRLF ?
if(tp.GetChar() == CR) // Yes or CRLF is at start of
Move(-1); // story. Try to back up over
} // CR (If at BOS, no effect)
}
return _cp - cpSave;
}
/*
* CTxtPtr::IsAtEOP()
*
* @mfunc
* Return TRUE iff this text pointer is at an end-of-paragraph mark
*
* @rdesc
* TRUE if at EOP
*
* @devnote
* End of paragraph marks for RichEdit 1.0 and the MLE can be CRLF
* and CRCRLF. For RichEdit 2.0, EOPs can also be CR, VT (0xB - Shift-
* Enter), and FF (0xC - page break or form feed).
*/
BOOL CTxtPtr::IsAtEOP()
{
TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::IsAtEOP");
_TEST_INVARIANT_
unsigned ch = GetChar();
if(IsASCIIEOP(ch)) // See if LF <= ch <= CR
{ // Clone tp in case
CTxtPtr tp(*this); // AdjustCpCRLF moves
return !tp.AdjustCRLF(); // Return TRUE unless in
} // middle of CRLF or CRCRLF
return (ch | 1) == PS || ch == CELL; // Allow Unicode 0x2028/9 also
}
/*
* CTxtPtr::IsAfterEOP()
*
* @mfunc
* Return TRUE iff this text pointer is just after an end-of-paragraph
* mark
*
* @rdesc
* TRUE iff text ptr follows an EOP mark
*/
BOOL CTxtPtr::IsAfterEOP()
{
TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::IsAfterEOP");
_TEST_INVARIANT_
if(IsASCIIEOP(GetChar()))
{
CTxtPtr tp(*this); // If in middle of CRLF
if(tp.AdjustCRLF()) // or CRCRLF, return FALSE
return FALSE;
}
return IsEOP(GetPrevChar()); // After EOP if after Unicode
} // PS or LF, VT, FF, CR, CELL
/*
* CTxtPtr::IsAtTRD(ch)
*
* @mfunc
* Return TRUE iff this text pointer is at a table row delimiter (ch CR).
* If ch = 0, then match both start and end delimiters.
*
* @rdesc
* TRUE iff text ptr is at a table row end delimiter
*/
BOOL CTxtPtr::IsAtTRD(
WCHAR ch) //@parm Table Row Delimiter
{
LONG cchValid;
const WCHAR *pch = GetPch(cchValid);
if(cchValid < 1)
return FALSE;
WCHAR chNext;
if(cchValid < 2) // In case gap splits TRD
{ // (may happen after undo)
CTxtPtr tp(*this);
tp.Move(1);
chNext = tp.GetChar();
}
else
chNext = *(pch + 1);
if(chNext != CR)
return FALSE;
if(ch)
{
AssertSz(ch == STARTFIELD || ch == ENDFIELD,
"CTxtPtr::IsAtTRD: illegal argument");
return *pch == ch;
}
ch = *pch;
return ch == STARTFIELD || ch == ENDFIELD;
}
/*
* CTxtPtr::IsAfterTRD(ch)
*
* @mfunc
* Return TRUE iff this text pointer immediately follows a table row
* start/end delimiter specified by ch (ch = STARTFIELD/ENDFIELD
* followed by CR). If ch = 0, then match both start and end delims.
*
* @rdesc
* TRUE iff text ptr follows an table row start delimiter
*/
BOOL CTxtPtr::IsAfterTRD(
WCHAR ch) //@parm Table Row Delimiter
{
LONG cchValid;
const WCHAR *pch = GetPchReverse(cchValid);
if(cchValid < 1 || *(pch - 1) != CR)
return FALSE;
WCHAR chPrev;
if(cchValid < 2) // In case gap splits TRD
{ // (may happen after undo)
CTxtPtr tp(*this);
tp.Move(-1);
chPrev = tp.GetPrevChar();
}
else
chPrev = *(pch - 2);
if(ch)
{
AssertSz(ch == STARTFIELD || ch == ENDFIELD,
"CTxtPtr::IsAfterTRD: illegal argument");
return chPrev == ch;
}
return chPrev == STARTFIELD || chPrev == ENDFIELD;
}
/*
* CTxtPtr::IsAtStartOfCell()
*
* @mfunc
* Return TRUE iff this text pointer immediately follows a table row
* start delimiter (STARTFIELD CR) or any cell delimiter (CELL) except
* the last one in a row.
*
* @rdesc
* TRUE iff text ptr follows an table row start delimiter
*/
BOOL CTxtPtr::IsAtStartOfCell()
{
LONG cchValid;
const WCHAR *pch = GetPchReverse(cchValid);
return cchValid && *(pch - 1) == CELL && !IsAtTRD(ENDFIELD) ||
cchValid >= 2 && *(pch - 1) == CR && *(pch - 2) == STARTFIELD;
}
// Needed for CTxtPtr::ReplaceRange() and InsertRange()
#if cchGapInitial < 1
#error "cchGapInitial must be at least one"
#endif
/*
* CTxtPtr::MoveWhile(cch, chFirst, chLast, fInRange)
*
* @mfunc
* Move this text ptr 1) to first char (fInRange ? in range : not in range)
* chFirst thru chLast or 2) cch chars, which ever comes first. Return
* count of chars left in run on return. E.g., chFirst = 0, chLast = 0x7F
* and fInRange = TRUE breaks on first nonASCII char.
*
* @rdesc
* cch left in run on return
*/
LONG CTxtPtr::MoveWhile(
LONG cchRun, //@parm Max cch to check
WCHAR chFirst, //@parm First ch in range
WCHAR chLast, //@parm Last ch in range
BOOL fInRange) //@parm break on non0/0 high byte for TRUE/FALSE
{
LONG cch;
LONG i;
const WCHAR *pch;
while(cchRun)
{
pch = GetPch(cch);
cch = min(cch, cchRun);
for(i = 0; i < cch; i++)
{
if(IN_RANGE(chFirst, *pch++, chLast) ^ fInRange)
{
Move(i); // Advance to 1st char with 0/non0 masked
return cchRun - i; // value
}
}
cchRun -= cch;
Move(cch); // Advance to next txt bdy
}
return 0;
}
/*
* CTxtPtr::FindWordBreak(action, cpMost)
*
* @mfunc
* Find a word break and move this text pointer to it.
*
* @rdesc
* Offset from cp of the word break
*/
LONG CTxtPtr::FindWordBreak(
INT action, //@parm See TxWordBreakProc header
LONG cpMost) //@parm Limiting character position
{
TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::FindWordBreak");
_TEST_INVARIANT_
const INT breakBufSize = 10;
LONG bufferSize;
LONG cch;
LONG cchBuffer;
LONG cchChunk;
LONG cchText = GetTextLength();
WCHAR ch = GetChar();
WCHAR pchBreakBuf[breakBufSize];
LONG cpSave = _cp; // For calculating break pt
LONG ichBreak;
WCHAR * pBuf;
WCHAR const * pch;
LONG t; // Temp for abs() macro
BOOL b10ModeWordBreak = (_ped->Get10Mode() && _ped->_pfnWB);
if(action == WB_CLASSIFY || action == WB_ISDELIMITER)
return ch ? _ped->TxWordBreakProc(&ch, 0, CbOfCch(1), action, GetCp()) : 0;
if(action & 1) // Searching forward
{ // Easiest to handle EOPs
if(action == WB_MOVEWORDRIGHT && IsEOP(ch)) // explicitly (spanning
{ // a class can go too
AdjustCRLF(); // far). Go to end of
AdvanceCRLF(); // EOP "word"
goto done;
}
// Calc. max search
if((DWORD)cpMost > (DWORD)cchText) // Bounds check: get < 0
cpMost = cchText; // as well as too big
cch = cpMost - _cp;
while(cch > 0)
{ // The independent buffer
cchBuffer = min(cch, breakBufSize - 1); // avoids gaps in BS
cch -= bufferSize = cchBuffer;
pBuf = pchBreakBuf; // Fill buffer forward
// Grab the first character in reverse for fnWB that require 2
// chars. Note, we play with _ich to get single char fnWB
// to ignore this character.
pch = GetPchReverse(cchChunk);
if ( !cchChunk ) pch = L" "; // Any break char
*pBuf++ = *pch;
// *pBuf++ = (cchChunk ? *(pch - 1) : L' ');
while ( cchBuffer ) // Finish filling
{
pch = GetPch(cchChunk);
if (!cchChunk) { Assert(0); break; }
cchChunk = min(cchBuffer, cchChunk);
Move(cchChunk);
wcsncpy(pBuf, pch, cchChunk);
pBuf += cchChunk;
cchBuffer -= cchChunk;
}
ichBreak = _ped->TxWordBreakProc(pchBreakBuf, 1, // Find the break
CbOfCch(bufferSize+1), action, GetCp()-bufferSize, GetCp()-bufferSize) - 1;
// in 1.0 mode some apps will return 0 implying the current cp position is a valid break point
if (ichBreak == -1 && b10ModeWordBreak)
ichBreak = 0;
// Apparently, some fnWBs return ambiguous results
if(ichBreak >= 0 && ichBreak <= bufferSize)
{
// Ambiguous break pt?
// Due to the imprecise nature of the word break proc spec,
// we've reached an ambiguous condition where we don't know
// if this is really a break, or just the end of the data.
// By backing up or going forward by 2, we'll know for sure.
// NOTE: we'll always be able to advance or go back by 2
// because we guarantee that when !cch that we have
// at least breakBufSize (16) characters in the data stream.
if (ichBreak < bufferSize || !cch)
{
Move( ichBreak - bufferSize );
break;
}
// Need to recalc break pt to disambiguate
t = Move(ichBreak - bufferSize - 2); // abs() is a
cch += abs(t); // macro
}
}
}
else // REVERSE - code dup based on EliK "streams" concept.
{
if(!_cp) // Can't go anywhere
return 0;
if(action == WB_MOVEWORDLEFT) // Easiest to handle EOPs
{ // here
if(IsASCIIEOP(ch) && AdjustCRLF()) // In middle of a CRLF or
goto done; // CRCRLF "word"
ch = PrevChar(); // Check if previous char
if(IsEOP(ch)) // is an EOP char
{
if(ch == LF) // Backspace to start of
AdjustCRLF(); // CRLF and CRCRLF
goto done;
}
Move(1); // Move back to start char
}
// Calc. max search
if((DWORD)cpMost > (DWORD)_cp) // Bounds check (also
cpMost = _cp; // handles cpMost < 0)
cch = cpMost;
while(cch > 0)
{ // The independent buffer
cchBuffer = min(cch, breakBufSize - 1); // avoids gaps in BS
cch -= bufferSize = cchBuffer;
pBuf = pchBreakBuf + cchBuffer; // Fill from the end.
// Grab the first character forward for fnWB that require 2 chars.
// Note: we play with _ich to get single char fnWB to ignore this
// character.
pch = GetPch(cchChunk);
if ( !cchChunk ) pch = L" "; // Any break char
*pBuf = *pch;
while ( cchBuffer > 0 ) // Fill rest of buffer
{ // before going in reverse
pch = GetPchReverse(cchChunk );
if (!cchChunk) { Assert(0); break; }
cchChunk = min(cchBuffer, cchChunk);
Move(-cchChunk);
pch -= cchChunk;
pBuf -= cchChunk;
wcsncpy(pBuf, pch, cchChunk);
cchBuffer -= cchChunk;
}
// Get break left.
ichBreak = _ped->TxWordBreakProc(pchBreakBuf, bufferSize,
CbOfCch(bufferSize+1), action, GetCp(), GetCp()+bufferSize);
// in 1.0 mode some apps will return 0 implying the current cp position is a valid break point
if (ichBreak == 0 && b10ModeWordBreak)
ichBreak = bufferSize;
// Apparently, some fnWBs return ambiguous results
if(ichBreak >= 0 && ichBreak <= bufferSize)
{ // Ambiguous break pt?
// NOTE: when going in reverse, we have >= bufsize - 1
// because there is a break-after char (hyphen).
if ( ichBreak > 0 || !cch )
{
Move(ichBreak); // Move _cp to break point.
break;
}
cch += Move(2 + ichBreak); // Need to recalc break pt
} // to disambiguate.
}
}
done:
return _cp - cpSave; // Offset of where to break
}
/*
* CTxtPtr::TranslateRange(cch, CodePage, fSymbolCharSet, publdr)
*
* @mfunc
* Translate a range of text at this text pointer to...
*
* @rdesc
* Count of new characters added (should be same as count replaced)
*
* @devnote
* Moves this text pointer to end of replaced text.
* May move text block and formatting arrays.
*/
LONG CTxtPtr::TranslateRange(
LONG cch, //@parm length of range to translate
UINT CodePage, //@parm CodePage for MBTWC or WCTMB
BOOL fSymbolCharSet, //@parm Target charset
IUndoBuilder *publdr) //@parm Undo bldr to receive antievents
{
CTempWcharBuf twcb;
CTempCharBuf tcb;
UINT ch;
BOOL fAllASCII = TRUE;
BOOL fNoCodePage;
BOOL fUsedDef; //@parm Out parm to receive whether default char used
LONG i;
char * pastr = tcb.GetBuf(cch);
WCHAR * pstr = twcb.GetBuf(cch);
WCHAR * pstrT = pstr;
i = GetText(cch, pstr);
Assert(i == cch);
if(fSymbolCharSet) // Target is SYMBOL_CHARSET
{
WCTMB(CodePage, 0, pstr, cch, pastr, cch, "\0", &fUsedDef,
&fNoCodePage, FALSE);
if(fNoCodePage)
return cch;
for(; i && *pastr; i--) // Break if conversion failed
{ // (NULL default char used)
if(*pstr >= 128)
fAllASCII = FALSE;
*pstr++ = *(BYTE *)pastr++;
}
cch -= i;
if(fAllASCII)
return cch;
}
else // Target isn't SYMBOL_CHARSET
{
while(i--)
{
ch = *pstr++; // Source is SYMBOL_CHARSET, so
*pastr++ = (char)ch; // all chars should be < 256
if(ch >= 128) // In any event, truncate to BYTE
fAllASCII = FALSE;
}
if(fAllASCII) // All ASCII, so no conversion needed
return cch;
MBTWC(CodePage, 0, pastr - cch, cch, pstrT, cch, &fNoCodePage);
if(fNoCodePage)
return cch;
}
return ReplaceRange(cch, cch, pstrT, publdr, NULL, NULL);
}
/*
* CTxtPtr::ReplaceRange(cchOld, cchNew, *pch, publdr, paeCF, paePF)
*
* @mfunc
* replace a range of text at this text pointer.
*
* @rdesc
* count of new characters added
*
* @comm SideEffects: <nl>
* moves this text pointer to end of replaced text <nl>
* moves text block array <nl>
*/
LONG CTxtPtr::ReplaceRange(
LONG cchOld, //@parm length of range to replace
// (<lt> 0 means to end of text)
LONG cchNew, //@parm length of replacement text
WCHAR const *pch, //@parm replacement text
IUndoBuilder *publdr, //@parm if non-NULL, where to put an
// anti-event for this action
IAntiEvent *paeCF, //@parm char format AE
IAntiEvent *paePF ) //@parm paragraph formatting AE
{
TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::ReplaceRange");
_TEST_INVARIANT_
LONG cchAdded = 0;
LONG cchInBlock;
LONG cchNewInBlock;
if(cchOld < 0)
cchOld = GetTextLength() - _cp;
if(publdr)
HandleReplaceRangeUndo( cchOld, cchNew, publdr, paeCF, paePF);
// Blocks involving replacement
while(cchOld > 0 && cchNew > 0)
{
CTxtBlk *ptb = GetRun(0);
// cchOld should never be nonzero if the text run is empty
AssertSz(ptb,
"CTxtPtr::Replace() - Pointer to text block is NULL !");
ptb->MoveGap(_ich);
cchInBlock = min(cchOld, ptb->_cch - _ich);
if(cchInBlock > 0)
{
cchOld -= cchInBlock;
ptb->_cch -= cchInBlock;
((CTxtArray *)_pRuns)->_cchText -= cchInBlock;
}
cchNewInBlock = CchOfCb(ptb->_cbBlock) - ptb->_cch;
// if there's room for a gap, leave one
if(cchNewInBlock > cchGapInitial)
cchNewInBlock -= cchGapInitial;
if(cchNewInBlock > cchNew)
cchNewInBlock = cchNew;
if(cchNewInBlock > 0)
{
CopyMemory(ptb->_pch + _ich, pch, CbOfCch(cchNewInBlock));
cchNew -= cchNewInBlock;
_cp += cchNewInBlock;
_ich += cchNewInBlock;
pch += cchNewInBlock;
cchAdded += cchNewInBlock;
ptb->_cch += cchNewInBlock;
ptb->_ibGap += CbOfCch(cchNewInBlock);
((CTxtArray *)_pRuns)->_cchText += cchNewInBlock;
}
if(_iRun >= Count() - 1 || !cchOld )
break;
// Go to next block
_iRun++;
_ich = 0;
}
if(cchNew > 0)
cchAdded += InsertRange(cchNew, pch);
else if(cchOld > 0)
DeleteRange(cchOld);
return cchAdded;
}
/*
* CTxtPtr::HandleReplaceRangeUndo (cchOld, cchNew, publdr, paeCF, paePF)
*
* @mfunc
* worker function for ReplaceRange. Figures out what will happen in
* the replace range call and creates the appropriate anti-events
*
* @devnote
* We first check to see if our replace range data can be merged into
* an existing anti-event. If it can, then we just return.
* Otherwise, we copy the deleted characters into an allocated buffer
* and then create a ReplaceRange anti-event.
*
* In order to handle ordering problems between formatting and text
* anti-events (that is, text needs to exist before formatting can
* be applied), we have any formatting anti-events passed to us first.
*/
void CTxtPtr::HandleReplaceRangeUndo(
LONG cchOld, //@parm Count of characters to delete
LONG cchNew, //@parm Count of new characters to add
IUndoBuilder * publdr, //@parm Undo builder to receive anti-event
IAntiEvent * paeCF, //@parm char formatting AE
IAntiEvent * paePF ) //@parm paragraph formatting AE
{
TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::HandleReplaceRangeUndo");
_TEST_INVARIANT_
IAntiEvent *pae = publdr->GetTopAntiEvent();
WCHAR * pch = NULL;
if(pae)
{
SimpleReplaceRange sr;
sr.cpMin = _cp;
sr.cpMax = _cp + cchNew;
sr.cchDel = cchOld;
if(pae->MergeData(MD_SIMPLE_REPLACERANGE, &sr) == NOERROR)
{
// If the data was merged successfully, then we do
// not need these anti-events
if(paeCF)
DestroyAEList(paeCF);
if(paePF)
DestroyAEList(paePF);
// we've done everything we need to.
return;
}
}
// Allocate a buffer and grab the soon-to-be deleted
// text (if necessary)
if( cchOld > 0 )
{
pch = new WCHAR[cchOld];
if( pch )
GetText(cchOld, pch);
else
cchOld = 0;
}
// The new range will exist from our current position plus
// cchNew (because everything in cchOld gets deleted)
pae = gAEDispenser.CreateReplaceRangeAE(_ped, _cp, _cp + cchNew,
cchOld, pch, paeCF, paePF);
if( !pae )
delete pch;
if( pae )
publdr->AddAntiEvent(pae);
}
/*
* CTxtPtr::InsertRange(cch, pch)
*
* @mfunc
* Insert a range of characters at this text pointer
*
* @rdesc
* Count of characters successfully inserted
*
* @comm Side Effects: <nl>
* moves this text pointer to end of inserted text <nl>
* moves the text block array <nl>
*/
LONG CTxtPtr::InsertRange (
LONG cch, //@parm length of text to insert
WCHAR const *pch) //@parm text to insert
{
TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::InsertRange");
_TEST_INVARIANT_
LONG cchSave = cch;
LONG cchInBlock;
LONG cchFirst;
LONG cchLast = 0;
LONG ctbNew;
CTxtBlk *ptb;
// Ensure text array is allocated
if(!Count())
{
LONG cbSize = -1;
// If we don't have any blocks, allocate first block to be big enuf
// for the inserted text *only* if it's smaller than the normal block
// size. This allows us to be used efficiently as a display engine
// for small amounts of text.
if(cch < CchOfCb(cbBlockInitial))
cbSize = CbOfCch(cch);
if(!((CTxtArray *)_pRuns)->AddBlock(0, cbSize))
{
_ped->GetCallMgr()->SetOutOfMemory();
goto done;
}
}
ptb = GetRun(0);
cchInBlock = CchOfCb(ptb->_cbBlock) - ptb->_cch;
AssertSz(ptb->_cbBlock <= cbBlockMost, "block too big");
// Try resizing without splitting...
if(cch > cchInBlock &&
cch <= cchInBlock + CchOfCb(cbBlockMost - ptb->_cbBlock))
{
if( !ptb->ResizeBlock(min(cbBlockMost,
CbOfCch(ptb->_cch + cch + cchGapInitial))) )
{
_ped->GetCallMgr()->SetOutOfMemory();
goto done;
}
cchInBlock = CchOfCb(ptb->_cbBlock) - ptb->_cch;
}
if(cch <= cchInBlock)
{
// All fits into block without any hassle
ptb->MoveGap(_ich);
CopyMemory(ptb->_pch + _ich, pch, CbOfCch(cch));
_cp += cch; // *this points at end of
_ich += cch; // insertion
ptb->_cch += cch;
((CTxtArray *)_pRuns)->_cchText += cch;
ptb->_ibGap += CbOfCch(cch);
return cch;
}
// Won't all fit in this block, so figure out best division into blocks
TxDivideInsertion(cch, _ich, ptb->_cch - _ich,&cchFirst, &cchLast);
// Subtract cchLast up front so return value isn't negative
// if SplitBlock() fails
cch -= cchLast; // Don't include last block in count for middle blocks
// Split block containing insertion point
// ***** moves _prgtb ***** //
if(!((CTxtArray *)_pRuns)->SplitBlock(_iRun, _ich, cchFirst, cchLast,
_ped->IsStreaming()))
{
_ped->GetCallMgr()->SetOutOfMemory();
goto done;
}
ptb = GetRun(0); // Recompute ptb after (*_pRuns) moves
// Copy into first block (first half of split)
if(cchFirst > 0)
{
AssertSz(ptb->_ibGap == CbOfCch(_ich), "split first gap in wrong place");
AssertSz(cchFirst <= CchOfCb(ptb->_cbBlock) - ptb->_cch, "split first not big enough");
CopyMemory(ptb->_pch + _ich, pch, CbOfCch(cchFirst));
cch -= cchFirst;
pch += cchFirst;
_ich += cchFirst;
ptb->_cch += cchFirst;
((CTxtArray *)_pRuns)->_cchText += cchFirst;
ptb->_ibGap += CbOfCch(cchFirst);
}
// Copy into middle blocks
// FUTURE: (jonmat) I increased the size for how large a split block
// could be and this seems to increase the performance, we should test
// the block size difference on a retail build, however. 5/15/1995
ctbNew = cch / cchBlkInsertmGapI /* cchBlkInitmGapI */;
if(ctbNew <= 0 && cch > 0)
ctbNew = 1;
for(; ctbNew > 0; ctbNew--)
{
cchInBlock = cch / ctbNew;
AssertSz(cchInBlock > 0, "nothing to put into block");
// ***** moves _prgtb ***** //
if(!((CTxtArray *)_pRuns)->AddBlock(++_iRun,
CbOfCch(cchInBlock + cchGapInitial)))
{
_ped->GetCallMgr()->SetOutOfMemory();
BindToCp(_cp); //force a rebind;
goto done;
}
// NOTE: next line intentionally advances ptb to next CTxtBlk
ptb = GetRun(0);
AssertSz(ptb->_ibGap == 0, "New block not added correctly");
CopyMemory(ptb->_pch, pch, CbOfCch(cchInBlock));
cch -= cchInBlock;
pch += cchInBlock;
_ich = cchInBlock;
ptb->_cch = cchInBlock;
((CTxtArray *)_pRuns)->_cchText += cchInBlock;
ptb->_ibGap = CbOfCch(cchInBlock);
}
AssertSz(cch == 0, "Didn't use up all text");
// copy into last block (second half of split)
if(cchLast > 0)
{
AssertSz(_iRun < Count()-1, "no last block");
ptb = Elem(++_iRun);
AssertSz(ptb->_ibGap == 0, "split last gap in wrong place");
AssertSz(cchLast <= CchOfCb(ptb->_cbBlock) - ptb->_cch,
"split last not big enuf");
CopyMemory(ptb->_pch, pch, CbOfCch(cchLast));
// don't subtract cchLast from cch; it's already been done
_ich = cchLast;
ptb->_cch += cchLast;
((CTxtArray *)_pRuns)->_cchText += cchLast;
ptb->_ibGap = CbOfCch(cchLast);
cchLast = 0; // Inserted all requested chars
}
done:
AssertSz(cch + cchLast >= 0, "we should have inserted some characters");
AssertSz(cch + cchLast <= cchSave, "don't insert more than was asked for");
cch = cchSave - cch - cchLast; // # chars successfully inserted
_cp += cch;
AssertSz (GetTextLength() ==
((CTxtArray *)_pRuns)->CalcTextLength(),
"CTxtPtr::InsertRange(): _pRuns->_cchText screwed up !");
return cch;
}
/*
* TxDivideInsertion(cch, ichBlock, cchAfter, pcchFirst, pcchLast)
*
* @func
* Find best way to distribute an insertion
*
* @rdesc
* nothing
*/
static void TxDivideInsertion(
LONG cch, //@parm length of text to insert
LONG ichBlock, //@parm offset within block to insert text
LONG cchAfter, //@parm length of text after insertion in block
LONG *pcchFirst, //@parm exit: length of text to put in first block
LONG *pcchLast) //@parm exit: length of text to put in last block
{
TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "TxDivideInsertion");
LONG cchFirst = max(0, cchBlkCombmGapI - ichBlock);
LONG cchLast = max(0, cchBlkCombmGapI - cchAfter);
LONG cchPartial;
LONG cchT;
// Fill first and last blocks to min block size if possible
cchFirst = min(cch, cchFirst);
cch -= cchFirst;
cchLast = min(cch, cchLast);
cch -= cchLast;
// How much is left over when we divide up the rest?
cchPartial = cch % cchBlkInsertmGapI;
if(cchPartial > 0)
{
// Fit as much as the leftover as possible in the first and last
// w/o growing the first and last over cbBlockInitial
cchT = max(0, cchBlkInsertmGapI - ichBlock - cchFirst);
cchT = min(cchT, cchPartial);
cchFirst += cchT;
cch -= cchT;
cchPartial -= cchT;
if(cchPartial > 0)
{
cchT = max(0, cchBlkInsertmGapI - cchAfter - cchLast);
cchT = min(cchT, cchPartial);
cchLast += cchT;
}
}
*pcchFirst = cchFirst;
*pcchLast = cchLast;
}
/*
* CTxtPtr::DeleteRange(cch)
*
* @mfunc
* Delete cch characters starting at this text pointer
*
* @rdesc
* nothing
*
* @comm Side Effects: <nl>
* moves text block array
*/
void CTxtPtr::DeleteRange(
LONG cch) //@parm length of text to delete
{
TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::DeleteRange");
_TEST_INVARIANT_
LONG cchInBlock;
LONG ctbDel = 0; // Default no blocks to delete
LONG itb;
CTxtBlk * ptb = GetRun(0);
LONG cOldRuns = Count();
AssertSz(ptb,
"CTxtPtr::DeleteRange: want to delete, but no text blocks");
if (cch > GetTextLength() - _cp) // Don't delete beyond end of story
cch = GetTextLength() - _cp;
((CTxtArray *)_pRuns)->_cchText -= cch;
// remove from first block
ptb->MoveGap(_ich);
cchInBlock = min(cch, ptb->_cch - _ich);
cch -= cchInBlock;
ptb->_cch -= cchInBlock;
#ifdef DEBUG
((CTxtArray *)_pRuns)->Invariant();
#endif // DEBUG
for(itb = ptb->_cch ? _iRun + 1 : _iRun;
cch && cch >= Elem(itb)->_cch; ctbDel++, itb++)
{
// More to go: scan for complete blocks to remove
cch -= Elem(itb)->_cch;
}
if(ctbDel)
{
// ***** moves (*_pRuns) ***** //
itb -= ctbDel;
((CTxtArray *)_pRuns)->RemoveBlocks(itb, ctbDel);
}
// Remove from last block
if(cch > 0)
{
ptb = Elem(itb);
AssertSz(cch < ptb->_cch, "last block too small");
ptb->MoveGap(0);
ptb->_cch -= cch;
#ifdef DEBUG
((CTxtArray *)_pRuns)->Invariant();
#endif // DEBUG
}
((CTxtArray *)_pRuns)->CombineBlocks(_iRun);
if(cOldRuns > Count() || _iRun >= Count() || !Elem(_iRun)->_cch)
BindToCp(_cp); // Empty block: force tp rebind
AssertSz (GetTextLength() ==
((CTxtArray *)_pRuns)->CalcTextLength(),
"CTxtPtr::DeleteRange(): _pRuns->_cchText screwed up !");
}
/*
* CTxtPtr::FindText (cpLimit, dwFlags, pch, cch)
*
* @mfunc
* Find the text string <p pch> of length <p cch> starting at this
* text pointer. If found, move this text pointer to the end of the
* matched string and return the cp of the first character of the matched
* string. If not found, return -1 and don't change this text ptr.
*
* @rdesc
* character position of first match
* <lt> 0 if no match
*/
LONG CTxtPtr::FindText (
LONG cpLimit, //@parm Limit of search or <lt> 0 for end of text
DWORD dwFlags, //@parm FR_MATCHCASE case must match <nl>
// FR_WHOLEWORD match must be a whole word
const WCHAR *pch, //@parm Text to find
LONG cch) //@parm Length of text to find
{
LONG cpFirst, cpLast;
CTxtFinder tf;
if(tf.FindText(*this, cpLimit, dwFlags, pch, cch, cpFirst, cpLast))
{
// Set text ptr to char just after last char in found string
SetCp(cpLast + 1);
// Return cp of first char in found string
return cpFirst;
}
return -1;
}
/*
* CTxtPtr::FindOrSkipWhiteSpaces (cchMax, dwFlags, pdwResult)
*
* @mfunc
* Find a whitespace or a non-whitespace character (skip all whitespaces).
*
* @rdesc
* Signed number of character this ptr was moved by the operation.
* In case of moving backward, the return position was already adjusted forward
* so the caller doesnt need to.
*/
LONG CTxtPtr::FindOrSkipWhiteSpaces (
LONG cchMax, //@parm Max signed count of char to search
DWORD dwFlags, //@parm Input flags
DWORD* pdwResult) //@parm Flag set if found
{
const WCHAR* pch;
CTxtPtr tp(*this);
LONG iDir = cchMax < 0 ? -1 : 1;
LONG cpSave = _cp;
LONG cchChunk, cch = 0;
DWORD dwResult = 0;
BOOL (*pfnIsWhite)(unsigned) = IsWhiteSpace;
if (dwFlags & FWS_BOUNDTOPARA)
pfnIsWhite = IsEOP;
if (cchMax < 0)
cchMax = -cchMax;
while (cchMax > 0 && !dwResult)
{
pch = iDir > 0 ? tp.GetPch(cch) : tp.GetPchReverse(cch);
if (!pch)
break; // No text available
if (iDir < 0)
pch--; // Going backward, point at previous char
cch = min(cch, cchMax);
for(cchChunk = cch; cch > 0; cch--, pch += iDir)
{
if ((dwFlags & FWS_SKIP) ^ pfnIsWhite(*pch))
{
dwResult++;
break;
}
}
cchChunk -= cch;
cchMax -= cchChunk;
tp.Move(iDir * cchChunk); // advance to next chunk
}
if (pdwResult)
*pdwResult = dwResult;
cch = tp.GetCp() - cpSave;
if (dwFlags & FWS_MOVE)
Move(cch); // Auto advance if requested
return cch;
}
/*
* CTxtPtr::FindWhiteSpaceBound (cchMin, cpStart, cpEnd, dwFlags)
*
* @mfunc
* Figure the smallest boundary that covers cchMin and limited by
* whitespaces (included CR/LF). This is how it works.
*
* Text: xxx xxx xxx xxx xxx
* cp + cchMin: xxxxx
* Boundary: xxxxxxxxxxxxx
*
* @rdesc
* cch of white space characters
*/
LONG CTxtPtr::FindWhiteSpaceBound (
LONG cchMin, // @parm Minimum char count to be covered
LONG& cpStart, // @parm Boundary start
LONG& cpEnd, // @parm Boundary end
DWORD dwFlags) // @parm Input flags
{
CTxtPtr tp(*this);
LONG cch = tp.GetTextLength();
LONG cp = _cp;
Assert (cp + cchMin <= cch);
cpStart = cpEnd = cp;
cpEnd += max(2, cchMin); // make sure it covers minimum requirement.
cpEnd = min(cpEnd, cch); // but not too many
dwFlags &= FWS_BOUNDTOPARA;
// Figure nearest upper bound
//
tp.SetCp(cpEnd);
cpEnd += tp.FindOrSkipWhiteSpaces(cch - cpEnd, dwFlags | FWS_MOVE); // find a whitespaces
cpEnd += tp.FindOrSkipWhiteSpaces(cch - cpEnd, dwFlags | FWS_MOVE | FWS_SKIP); // skip whitespaces
if (!(dwFlags & FWS_BOUNDTOPARA))
cpEnd += tp.FindOrSkipWhiteSpaces(cch - cpEnd, dwFlags | FWS_MOVE); // find a whitespace
// Figure nearest lower bound
//
tp.SetCp(cpStart);
cpStart += tp.FindOrSkipWhiteSpaces(-cpStart, dwFlags | FWS_MOVE); // find a whitespace
cpStart += tp.FindOrSkipWhiteSpaces(-cpStart, dwFlags | FWS_MOVE | FWS_SKIP); // skip whitespaces
if (!(dwFlags & FWS_BOUNDTOPARA))
cpStart += tp.FindOrSkipWhiteSpaces(-cpStart, dwFlags | FWS_MOVE); // find a whitespace
Assert (cpStart <= cpEnd && cpEnd - cpStart >= cchMin);
return cpEnd - cpStart;
}
/*
* CTxtPtr::FindEOP(cchMax, pResults)
*
* @mfunc
* Find EOP mark in a range within cchMax chars from this text pointer
* and position *this after it. If no EOP is found and cchMax is not
* enough to reach the start or end of the story, leave this text ptr
* alone and return 0. If no EOP is found and cchMax is sufficient to
* reach the start or end of the story, position this text ptr at the
* beginning/end of document (BOD/EOD) for cchMax <lt>/<gt> 0,
* respectively, that is, BOD and EOD are treated as a BOP and an EOP,
* respectively.
*
* @rdesc
* Return cch this text ptr is moved. Return in *pResults whether a CELL
* or EOP was found. The low byte gives the cch of the EOP if moving
* forward (else it's just 1).
*
* @devnote
* This function assumes that this text ptr isn't in middle of a CRLF
* or CRCRLF (found only in RichEdit 1.0 compatibility mode). Changing
* the for loop could speed up ITextRange MoveUntil/While substantially.
*/
LONG CTxtPtr::FindEOP (
LONG cchMax, //@parm Max signed count of chars to search
LONG *pResults) //@parm Flags saying if EOP and CELL are found
{
TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::FindEOP");
LONG cch = 0, cchStart; // cch's for scans
unsigned ch; // Current char
LONG cpSave = _cp; // Save _cp for returning delta
LONG iDir = 1; // Default forward motion
const WCHAR*pch; // Used to walk text chunks
LONG Results = 0; // Nothing found yet
CTxtPtr tp(*this); // tp to search text with
if(cchMax < 0) // Backward search
{
iDir = -1; // Backward motion
cchMax = -cchMax; // Make max count positive
cch = tp.AdjustCRLF(); // If in middle of CRLF or
if(!cch && IsAfterEOP()) // CRCRLF, or follow any EOP,
cch = tp.BackupCRLF(); // backup before EOP
cchMax += cch;
}
while(cchMax > 0) // Scan until get out of search
{ // range or match an EOP
pch = iDir > 0 // Point pch at contiguous text
? tp.GetPch(cch) // chunk going forward or
: tp.GetPchReverse(cch); // going backward
if(!pch) // No more text to search
break;
if(iDir < 0) // Going backward, point at
pch--; // previous char
cch = min(cch, cchMax); // Limit scan to cchMax chars
for(cchStart = cch; cch; cch--) // Scan chunk for EOP
{
ch = *pch;
pch += iDir;
if(IN_RANGE(CELL, ch, CR) && ch != TAB)
{ // Note that EOP was found
if(ch == CELL)
Results |= FEOP_CELL;
Results |= FEOP_EOP;
break;
}
}
cchStart -= cch; // Get cch of chars passed by
cchMax -= cchStart; // Update cchMax
AssertSz(iDir > 0 && GetCp() + cchStart <= GetTextLength() ||
iDir < 0 && GetCp() - cchStart >= 0,
"CTxtPtr::FindEOP: illegal advance");
tp.Move(iDir*cchStart); // Update tp
if(Results & FEOP_EOP) // Found an EOP
break;
} // Continue with next chunk
LONG cp = tp.GetCp();
if ((Results & FEOP_EOP) || !cp || // Found EOP or cp is at story
cp == GetTextLength()) // beginning or end
{
SetCp(cp); // Set _cp = tp._cp
if(iDir > 0) // Going forward, put ptr just
Results = (Results & ~255) | AdvanceCRLF(FALSE);// after EOP
// (going back already there)
}
if(pResults) // Report whether EOP and CELL
*pResults = Results; // were found
return _cp - cpSave; // Return cch this tp moved
}
/*
* CTxtPtr::FindBOSentence(cch)
*
* @mfunc
* Find beginning of sentence in a range within cch chars from this text
* pointer and position *this at it. If no sentence beginning is found,
* position *this at beginning of document (BOD) for cch <lt> 0 and
* leave *this unchanged for cch >= 0.
*
* @rdesc
* Count of chars moved *this moves
*
* @comm
* This routine defines a sentence as a character string that ends with
* period followed by at least one whitespace character or the EOD. This
* should be replacable so that other kinds of sentence endings can be
* used. This routine also matches initials like "M. " as sentences.
* We could eliminate those by requiring that sentences don't end with
* a word consisting of a single capital character. Similarly, common
* abbreviations like "Mr." could be bypassed. To allow a sentence to
* end with these "words", two blanks following a period could be used
* to mean an unconditional end of sentence.
*/
LONG CTxtPtr::FindBOSentence (
LONG cch) //@parm max signed count of chars to search
{
TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::FindBOSentence");
_TEST_INVARIANT_
LONG cchWhite = 0; // No whitespace chars yet
LONG cp;
LONG cpSave = _cp; // Save value for return
BOOL fST; // TRUE if sent terminator
LONG iDir = cch > 0 ? 1 : -1; // Move() increment
CTxtPtr tp(*this); // tp to search with
if(iDir > 0) // If going forward in white
while(IsWhiteSpace(tp.GetChar()) && // space, backup to 1st non
tp.Move(-1)); // whitespace char (in case
// inside sentence ending)
while(iDir > 0 || tp.Move(-1)) // Need to back up if finding
{ // backward
for(fST = FALSE; cch; cch -= iDir) // Find sentence terminator
{
fST = IsSentenceTerminator(tp.GetChar());
if(fST || !tp.Move(iDir))
break;
}
if(!fST) // If FALSE, we ran out of
break; // chars
while(IsWhiteSpace(tp.NextChar()) && cch)
{ // Bypass a span of blank
cchWhite++; // chars
cch--;
}
if(cchWhite && (cch >= 0 || tp._cp < cpSave))// Matched new sentence
break; // break
if(cch < 0) // Searching backward
{
tp.Move(-cchWhite - 1); // Back up to terminator
cch += cchWhite + 1; // Fewer chars to search
}
cchWhite = 0; // No whitespace yet for next
} // iteration
cp = tp._cp;
if(cchWhite || !cp || cp == GetTextLength())// If sentence found or got
SetCp(cp); // start/end of story, set
// _cp to tp's
return _cp - cpSave; // Tell caller cch moved
}
/*
* CTxtPtr::IsAtBOSentence()
*
* @mfunc
* Return TRUE iff *this is at the beginning of a sentence (BOS) as
* defined in the description of the FindBOSentence(cch) routine
*
* @rdesc
* TRUE iff this text ptr is at the beginning of a sentence
*/
BOOL CTxtPtr::IsAtBOSentence()
{
TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::IsAtBOSentence");
if(!_cp) // Beginning of story is an
return TRUE; // unconditional beginning
// of sentence
unsigned ch = GetChar();
if (IsWhiteSpace(ch) || // Proper sentences don't
IsSentenceTerminator(ch)) // start with whitespace or
{ // sentence terminators
return FALSE;
}
LONG cchWhite;
CTxtPtr tp(*this); // tp to walk preceding chars
for(cchWhite = 0; // Backspace over possible
IsWhiteSpace(ch = tp.PrevChar()); // span of whitespace chars
cchWhite++) ;
return cchWhite && IsSentenceTerminator(ch);
}
/*
* CTxtPtr::IsAtBOWord()
*
* @mfunc
* Return TRUE iff *this is at the beginning of a word, that is,
* _cp = 0 or the char at _cp is an EOP, or
* FindWordBreak(WB_MOVEWORDRIGHT) would break at _cp.
*
* @rdesc
* TRUE iff this text ptr is at the beginning of a Word
*/
BOOL CTxtPtr::IsAtBOWord()
{
TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::IsAtBOWord");
if(!_cp || IsAtEOP()) // Story beginning is also
return TRUE; // a word beginning
CTxtPtr tp(*this);
tp.Move(-1);
tp.FindWordBreak(WB_MOVEWORDRIGHT);
return _cp == tp._cp;
}
/*
* CTxtPtr::FindExact(cchMax, pch)
*
* @mfunc
* Find exact text match for null-terminated string pch in a range
* starting at this text pointer. Position this just after matched
* string and return cp at start of string, i.e., same as FindText().
*
* @rdesc
* Return cp of first char in matched string and *this pointing at cp
* just following matched string. Return -1 if no match
*
* @comm
* Much faster than FindText, but still a simple search, i.e., could
* be improved.
*
* FindText can delegate to this search for search strings in which
* each char can only match itself.
*/
LONG CTxtPtr::FindExact (
LONG cchMax, //@parm signed max # of chars to search
WCHAR * pch) //@parm ptr to null-terminated string to find exactly
{
TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::FindExact");
_TEST_INVARIANT_
LONG cch, cchStart;
LONG cchValid;
LONG cchText = GetTextLength();
LONG cpMatch;
LONG iDir = 1; // Default for forward search
const WCHAR *pc;
CTxtPtr tp(*this); // tp to search text with
if(!*pch)
return -1; // Signal null string not found
if(cchMax < 0) // Backward search
{
iDir = -1;
cchMax = -cchMax; // Make count positive
}
while(cchMax > 0)
{
if(iDir > 0)
{
if(tp.GetCp() >= cchText) // Can't go further
break;
pc = tp.GetPch(cchValid); // Characters we can search w/o
cch = cchValid; // encountering block end/gap,
} // i.e., stay within text chunk
else
{
if(!tp.GetCp()) // Can't back up any more
break;
tp.Move(-1);
pc = tp.GetPchReverse(cchValid);
cch = cchValid + 1;
}
cch = min(cch, cchMax);
if(!cch || !pc)
break; // No more text to search
for(cchStart = cch; // Find first char
cch && *pch != *pc; cch--) // Most execution time is spent
{ // in this loop going forward or
pc += iDir; // backward. x86 rep scasb/scasw
} // are faster
cchStart -= cch;
cchMax -= cchStart; // Update cchMax
tp.Move( iDir*(cchStart)); // Update tp
if(cch && *pch == *pc) // Matched first char
{ // See if matches up to null
cpMatch = tp.GetCp(); // Save cp of matched first char
cch = cchMax;
for(pc = pch; // Try to match rest of string
cch && *++pc==tp.NextChar();// Note: this match goes forward
cch--) ; // for both values of iDir
if(!cch)
break; // Not enuf chars for string
if(!*pc) // Matched null-terminated string
{ // *pch. Set this tp just after
SetCp(tp.GetCp()); // matched string and return cp
return cpMatch; // at start
}
tp.SetCp(cpMatch + iDir); // Move to char just following or
} // preceding matched first char
} // Up-to-date tp: continue search
return -1; // Signal string not found
}
/*
* CTxtPtr::NextCharCount(&cch)
*
* @mfunc
* Helper function for getting next char and decrementing abs(*pcch)
*
* @rdesc
* Next char
*/
WCHAR CTxtPtr::NextCharCount (
LONG& cch) //@parm count to use and decrement
{
TRACEBEGIN(TRCSUBSYSTOM, TRCSCOPEINTERN, "CTxtPtr::NextCharCount");
LONG iDelta = (cch > 0) ? 1 : -1;
if(!cch || !Move(iDelta))
return 0;
cch -= iDelta; // Count down or up
return GetChar(); // Return char at _cp
}
/*
* CTxtPtr::Zombie ()
*
* @mfunc
* Turn this object into a zombie by NULLing out its _ped member
*/
void CTxtPtr::Zombie ()
{
TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::Zombie");
_ped = NULL;
_cp = 0;
SetToNull();
}
/*
* CTxtIStream::CTxtIStream(tp, iDir)
*
* @mfunc
* Creates from the textptr, <p tp>, a character input stream with which
* to retrieve characters starting from the cp of the <p tp> and proceeding
* in the direction indicated by <p iDir>.
*/
CTxtIStream::CTxtIStream(
const CTxtPtr &tp,
int iDir
) : CTxtPtr(tp)
{
_pfnGetChar = (iDir == DIR_FWD ?
&CTxtIStream::GetNextChar : &CTxtIStream::GetPrevChar);
_cch = 0;
_pch = NULL;
}
/*
* CTxtIStream::GetNextChar()
*
* @mfunc
* Returns the next character in the text stream.
* Ensures that at least one valid character exists in _pch and then returns
* the next character in _pch.
*
* @rdesc
* WCHAR the next character in the character input stream
* 0, if end of text stream
*/
WCHAR CTxtIStream::GetNextChar()
{
if(!_cch)
FillPchFwd();
if(_cch)
{
_cch--;
return *_pch++;
}
return 0;
}
/*
* CTxtIStream::GetPrevChar()
*
* @mfunc
* Returns the next character in the text stream, where the direction of the
* stream is reverse.
* Ensures that at least one valid character exists in _pch and then returns
* the next character in _pch. Here, _pch points to the end of a string
* containing _cch valid characters.
*
* @rdesc
* WCHAR the next character in the character input stream (travelling backwards
* along the string pointed to by _pch)
* 0, if end of text stream
*/
WCHAR CTxtIStream::GetPrevChar()
{
if(!_cch)
FillPchRev();
if(_cch)
{
_cch--;
return *(--_pch);
}
return 0;
}
/*
* CTxtIStream::FillPchFwd()
*
* @mfunc
* Gets the next run of characters and Moves the cp of this CTxtPtr (base
* class) just past the run.
* This ensures enough chars in _pch to facilitate the next _cch calls to
* GetNextChar().
*/
void CTxtIStream::FillPchFwd()
{
_pch = GetPch(_cch);
Move(_cch);
}
/*
* CTxtIStream::FillPchRev()
*
* @mfunc
* Gets the run of characters preceding the one previously pointed to by _pch
* and moves the cp of this CTxtPtr (base class) to the beginning of the run.
* This ensures enough chars in _pch to facilitate the next _cch calls to
* GetPrevChar().
*/
void CTxtIStream::FillPchRev()
{
_pch = GetPchReverse(_cch);
Move(-_cch);
}
/*
* CTxtFinder::FindText(tp, cpLimit, dwFlags, pchToFind, cchToFind, &cpFirst, &cpLast)
*
* @mfunc
* Find the text string <p pchToFind> of length <p cchToFind> starting at
* this text pointer. If found, <p cpFirst> and <p cpLast> are set to the
* cp's of the first and last characters in the matched string (wrt tp).
* If not found, return FALSE.
*
* @rdesc
* TRUE string matched. First char at tp.GetCp() + cchOffFirst.
* Last char at tp.GetCp() + cchOffLast.
* FALSE string not found.
*/
BOOL CTxtPtr::CTxtFinder::FindText (
const CTxtPtr &tp,
LONG cpLimit, //@parm Limit of search or <lt> 0 for end of text
DWORD dwFlags, //@parm FR_MATCHCASE case must match <nl>
// FR_WHOLEWORD match must be a whole word
const WCHAR *pchToFind, //@parm Text to search for
LONG cchToFind, //@parm Count of chars to search for
LONG &cpFirst, //@parm If string found, returns cp (wrt tp) of first char
LONG &cpLast) //@parm If string found, returns cp (wrt tp) of last char
{
if(!cchToFind)
return FALSE;
_fSearchForward = dwFlags & FR_DOWN;
// Calculate max number of chars we must search for pchToFind
if(_fSearchForward)
{
const LONG cchText = tp.GetTextLength();
if((DWORD)cpLimit > (DWORD)cchText) // NB: catches cpLimit < 0 too
cpLimit = cchText;
_cchToSearch = cpLimit - tp.GetCp();
}
else
{
if((DWORD)cpLimit > (DWORD)tp.GetCp()) // NB: catches cpLimit < 0 too
cpLimit = 0;
_cchToSearch = tp.GetCp() - cpLimit;
}
if(cchToFind > _cchToSearch)
{
// Not enough chars in requested direction within which
// to find string
return FALSE;
}
const BOOL fWholeWord = dwFlags & FR_WHOLEWORD;
_fIgnoreCase = !(dwFlags & FR_MATCHCASE);
_fMatchAlefhamza = dwFlags & FR_MATCHALEFHAMZA;
_fMatchKashida = dwFlags & FR_MATCHKASHIDA;
_fMatchDiac = dwFlags & FR_MATCHDIAC;
typedef LONG (CTxtPtr::CTxtFinder::*PFNMATCHSTRING)(WCHAR const *pchToFind,
LONG cchToFind,
CTxtIStream &tistr);
// Setup function pointer appropriate for this type of search
CTxtEdit* ped = tp._ped;
PFNMATCHSTRING pfnMatchString;
#define MATCHARABICSPECIALS (FR_MATCHALEFHAMZA | FR_MATCHKASHIDA | FR_MATCHDIAC)
// If match all Arabic special characters exactly, then use simpler
// MatchString routine. If ignore any and BiDi text exists, use
// MatchStringBiDi.
pfnMatchString = (ped->IsBiDi() &&
(dwFlags & MATCHARABICSPECIALS) != MATCHARABICSPECIALS)
? &CTxtFinder::MatchStringBiDi
: &CTxtFinder::MatchString;
_iDirection = _fSearchForward ? 1 : -1;
BOOL fFound = FALSE;
WCHAR chFirst = _fSearchForward ? *pchToFind : pchToFind[cchToFind - 1];
const WCHAR *pchRemaining = _fSearchForward ?
&pchToFind[1] : &pchToFind[cchToFind - 2];
LONG cchRead;
LONG cchReadToFirst = 0;
LONG cchReadToLast;
CTxtIStream tistr(tp,
_fSearchForward ? CTxtIStream::DIR_FWD : CTxtIStream::DIR_REV);
while((cchRead = FindChar(chFirst, tistr)) != -1)
{
cchReadToFirst += cchRead;
if(cchToFind == 1) // Only one char in string - we've matched it!
{
if (_iDirection > 0) // Searching forward
{
Assert(tp.GetCp() + cchReadToFirst - 1 >= 0);
cpLast = cpFirst = tp.GetCp() + cchReadToFirst - 1;
}
else // Searching backward
{
Assert(tp.GetCp() - cchReadToFirst >= 0);
cpLast = cpFirst = tp.GetCp() - cchReadToFirst;
}
fFound = TRUE;
}
else
{
// Check if this first char begins a match of string
CTxtIStream tistrT(tistr);
cchRead = (this->*pfnMatchString)(pchRemaining, cchToFind - 1, tistrT);
if(cchRead != -1)
{
cchReadToLast = cchReadToFirst + cchRead;
if (_iDirection > 0) // Searching forward
{
Assert(tp.GetCp() + cchReadToFirst - 1 >= 0);
Assert(tp.GetCp() + cchReadToLast - 1 >= 0);
cpFirst = tp.GetCp() + cchReadToFirst - 1;
cpLast = tp.GetCp() + cchReadToLast - 1;
}
else // Searching backward
{
Assert(tp.GetCp() - cchReadToFirst >= 0);
Assert(tp.GetCp() - cchReadToLast >= 0);
cpFirst = tp.GetCp() - cchReadToFirst;
cpLast = tp.GetCp() - cchReadToLast;
}
fFound = TRUE;
}
}
if(fFound)
{
Assert(cpLast < tp.GetTextLength());
if(!fWholeWord)
break;
// Check if matched string is whole word
LONG cchT;
LONG cpBefore = (_fSearchForward ? cpFirst : cpLast) - 1;
LONG cpAfter = (_fSearchForward ? cpLast : cpFirst) + 1;
if((cpBefore < 0 ||
(ped->TxWordBreakProc(const_cast<LPTSTR>(CTxtPtr(tp._ped, cpBefore).GetPch(cchT)),
0,
sizeof(WCHAR),
WB_CLASSIFY, cpBefore) & WBF_CLASS) ||
ped->_pbrk && ped->_pbrk->CanBreakCp(BRK_WORD, cpBefore + 1))
&&
(cpAfter >= tp.GetTextLength() ||
(ped->TxWordBreakProc(const_cast<LPTSTR>(CTxtPtr(tp._ped, cpAfter).GetPch(cchT)),
0,
sizeof(WCHAR),
WB_CLASSIFY, cpAfter) & WBF_CLASS) ||
ped->_pbrk && ped->_pbrk->CanBreakCp(BRK_WORD, cpAfter)))
{
break;
}
else
fFound = FALSE;
}
}
if(fFound && !_fSearchForward)
{
// For search backwards, first and last are juxtaposed
LONG cpTemp = cpFirst;
cpFirst = cpLast;
cpLast = cpTemp;
}
return fFound;
}
/*
* CTxtPtr::CTxtFinder::CharCompMatchCase(ch1, ch2)
*
* @func Character comparison function sensitive to case according to parms
* of current search.
*
* @rdesc TRUE iff characters are equal
*/
inline BOOL CTxtPtr::CTxtFinder::CharComp(
WCHAR ch1,
WCHAR ch2) const
{
// We compare the characters ourselves if ignore case AND the character isn't a surrogate
//
return (_fIgnoreCase && !IN_RANGE(0xD800, ch1, 0xDFFF)) ? CharCompIgnoreCase(ch1, ch2) : (ch1 == ch2);
}
/*
* CTxtPtr::CTxtFinder::CharCompIgnoreCase(ch1, ch2)
*
* @func Character comparison function
*
* @rdesc TRUE iff characters are equal, ignoring case
*/
inline BOOL CTxtPtr::CTxtFinder::CharCompIgnoreCase(
WCHAR ch1,
WCHAR ch2) const
{
return CompareString(LOCALE_USER_DEFAULT,
NORM_IGNORECASE | NORM_IGNOREWIDTH,
&ch1, 1, &ch2, 1) == 2;
}
/*
* CTxtPtr::CTxtFinder::FindChar(ch, tistr)
*
* @mfunc
* Steps through the characters returned from <p tistr> until a character is
* found which matches ch or until _cchToSearch characters have been examined.
* If found, the return value indicates the number of chars read from <p tistr>.
* If not found, -1 is returned.
*
* @rdesc
* -1, if char not found
* n, if char found. n indicates number of chars read from <p tistr>
* to find the char
*/
LONG CTxtPtr::CTxtFinder::FindChar(
WCHAR ch,
CTxtIStream &tistr)
{
LONG cchSave = _cchToSearch;
while(_cchToSearch)
{
_cchToSearch--;
WCHAR chComp = tistr.GetChar();
if(CharComp(ch, chComp) ||
(!_fMatchAlefhamza && IsAlef(ch) && IsAlef(chComp)))
{
return cchSave - _cchToSearch;
}
}
return -1;
}
/*
* CTxtPtr::CTxtFinder::MatchString(pchToFind, cchToFind, tistr)
*
* @mfunc
* This method compares the characters returned from <p tistr> against those
* found in pchToFind. If the string is found, the return value indicates
* how many characters were read from <p tistr> to match the string.
* If the string is not found, -1 is returned.
*
* @rdesc
* -1, if string not found
* n, if string found. n indicates number of chars read from <p tistr>
* to find string
*/
LONG CTxtPtr::CTxtFinder::MatchString(
const WCHAR *pchToFind,
LONG cchToFind,
CTxtIStream &tistr)
{
if((DWORD)_cchToSearch < (DWORD)cchToFind)
return -1;
LONG cchT = cchToFind;
while(cchT--)
{
if(!CharComp(*pchToFind, tistr.GetChar()))
return -1;
pchToFind += _iDirection;
}
return cchToFind;
}
/*
* CTxtPtr::CTxtFinder::MatchStringBiDi(pchToFind, cchToFind, tistr)
*
* @mfunc
* This method compares the characters returned from <p tistr> against those
* found in pchToFind. If the string is found, the return value indicates
* how many characters were read from <p tistr> to match the string.
* If the string is not found, -1 is returned.
* Kashida, diacritics and Alefs are matched/not matched according
* to the type of search requested.
*
* @rdesc
* -1, if string not found
* n, if string found. n indicates number of chars read from <p tistr>
* to find string
*/
LONG CTxtPtr::CTxtFinder::MatchStringBiDi(
const WCHAR *pchToFind,
LONG cchToFind,
CTxtIStream &tistr)
{
if((DWORD)_cchToSearch < (DWORD)cchToFind)
return -1;
LONG cchRead = 0;
while(cchToFind)
{
WCHAR chComp = tistr.GetChar();
cchRead++;
if(!CharComp(*pchToFind, chComp))
{
if (!_fMatchKashida && chComp == KASHIDA ||
!_fMatchDiac && IsBiDiDiacritic(chComp))
{
continue;
}
if (!_fMatchAlefhamza &&
IsAlef(*pchToFind) && IsAlef(chComp))
{
// Skip *pchToFind
}
else
return -1;
}
pchToFind += _iDirection;
cchToFind--;
}
return cchRead;
}