6687 lines
199 KiB
C
6687 lines
199 KiB
C
/*++
|
||
|
||
Copyright (c) 1990 Microsoft Corporation
|
||
|
||
Module Name:
|
||
|
||
cachesub.c
|
||
|
||
Abstract:
|
||
|
||
This module implements the common subroutines for the Cache subsystem.
|
||
|
||
Author:
|
||
|
||
Tom Miller [TomM] 4-May-1990
|
||
|
||
Revision History:
|
||
|
||
--*/
|
||
|
||
#include "cc.h"
|
||
|
||
//
|
||
// The Bug check file id for this module
|
||
//
|
||
|
||
#define BugCheckFileId (CACHE_BUG_CHECK_CACHESUB)
|
||
|
||
//
|
||
// Define our debug constant
|
||
//
|
||
|
||
#define me 0x00000002
|
||
|
||
//
|
||
// Define those errors which should be retried
|
||
//
|
||
|
||
#define RetryError(STS) (((STS) == STATUS_VERIFY_REQUIRED) || ((STS) == STATUS_FILE_LOCK_CONFLICT))
|
||
|
||
ULONG CcMaxDirtyWrite = 0x10000;
|
||
|
||
//
|
||
// Local support routines
|
||
//
|
||
|
||
BOOLEAN
|
||
CcFindBcb (
|
||
IN PSHARED_CACHE_MAP SharedCacheMap,
|
||
IN PLARGE_INTEGER FileOffset,
|
||
IN OUT PLARGE_INTEGER BeyondLastByte,
|
||
OUT PBCB *Bcb
|
||
);
|
||
|
||
PBCB
|
||
CcAllocateInitializeBcb (
|
||
IN OUT PSHARED_CACHE_MAP SharedCacheMap OPTIONAL,
|
||
IN OUT PBCB AfterBcb,
|
||
IN PLARGE_INTEGER FileOffset,
|
||
IN PLARGE_INTEGER Length
|
||
);
|
||
|
||
NTSTATUS
|
||
CcSetValidData (
|
||
IN PFILE_OBJECT FileObject,
|
||
IN PLARGE_INTEGER ValidDataLength
|
||
);
|
||
|
||
BOOLEAN
|
||
CcAcquireByteRangeForWrite (
|
||
IN PSHARED_CACHE_MAP SharedCacheMap,
|
||
IN PLARGE_INTEGER TargetOffset OPTIONAL,
|
||
IN ULONG TargetLength,
|
||
OUT PLARGE_INTEGER FileOffset,
|
||
OUT PULONG Length,
|
||
OUT PBCB *FirstBcb
|
||
);
|
||
|
||
VOID
|
||
CcReleaseByteRangeFromWrite (
|
||
IN PSHARED_CACHE_MAP SharedCacheMap,
|
||
IN PLARGE_INTEGER FileOffset,
|
||
IN ULONG Length,
|
||
IN PBCB FirstBcb,
|
||
IN BOOLEAN VerifyRequired
|
||
);
|
||
|
||
PBITMAP_RANGE
|
||
CcFindBitmapRangeToDirty (
|
||
IN PMBCB Mbcb,
|
||
IN LONGLONG Page,
|
||
IN PULONG *FreePageForSetting
|
||
);
|
||
|
||
PBITMAP_RANGE
|
||
CcFindBitmapRangeToClean (
|
||
IN PMBCB Mbcb,
|
||
IN LONGLONG Page
|
||
);
|
||
|
||
BOOLEAN
|
||
CcLogError(
|
||
IN PFILE_OBJECT FileObject,
|
||
IN PUNICODE_STRING FileName,
|
||
IN NTSTATUS Error,
|
||
IN NTSTATUS DeviceError,
|
||
IN UCHAR IrpMajorCode
|
||
);
|
||
|
||
|
||
|
||
//
|
||
// Internal support routine
|
||
//
|
||
|
||
BOOLEAN
|
||
CcPinFileData (
|
||
IN PFILE_OBJECT FileObject,
|
||
IN PLARGE_INTEGER FileOffset,
|
||
IN ULONG Length,
|
||
IN BOOLEAN ReadOnly,
|
||
IN BOOLEAN WriteOnly,
|
||
IN ULONG Flags,
|
||
OUT PBCB *Bcb,
|
||
OUT PVOID *BaseAddress,
|
||
OUT PLARGE_INTEGER BeyondLastByte
|
||
)
|
||
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
This routine locks the specified range of file data into memory.
|
||
|
||
Note that the data desired by the caller (or the first part of it)
|
||
may be in one of three states:
|
||
|
||
No Bcb exists which describes the data
|
||
|
||
A Bcb exists describing the data, but it is not mapped
|
||
(BcbOut->BaseAddress == NULL)
|
||
|
||
A Bcb exists describing the data, and it is mapped
|
||
|
||
Given the above three states, and given that the caller may call
|
||
with either Wait == FALSE or Wait == TRUE, this routine has basically
|
||
six cases. What has to be done, and the order in which things must be
|
||
done varies quite a bit with each of these six cases. The most
|
||
straight-forward implementation of this routine, with the least amount
|
||
of branching, is achieved by determining which of the six cases applies,
|
||
and dispatching fairly directly to that case. The handling of the
|
||
cases is summarized in the following table:
|
||
|
||
Wait == TRUE Wait == FALSE
|
||
------------ -------------
|
||
|
||
no Bcb Case 1: Case 2:
|
||
|
||
CcAllocateInitializeBcb CcMapAndRead (exit if FALSE)
|
||
Acquire Bcb Exclusive CcAllocateInitializeBcb
|
||
Release BcbList SpinLock Acquire Bcb Shared if not ReadOnly
|
||
CcMapAndRead w/ Wait Release BcbList SpinLock
|
||
Convert/Release Bcb Resource
|
||
|
||
Bcb not Case 3: Case 4:
|
||
mapped
|
||
Increment PinCount Acquire Bcb Exclusive (exit if FALSE)
|
||
Release BcbList SpinLock CcMapAndRead (exit if FALSE)
|
||
Acquire Bcb Excl. w/ Wait Increment PinCount
|
||
if still not mapped Convert/Release Bcb Resource
|
||
CcMapAndRead w/ Wait Release BcbList SpinLock
|
||
Convert/Release Bcb Resource
|
||
|
||
Bcb mapped Case 5: Case 6:
|
||
|
||
Increment PinCount if not ReadOnly
|
||
Release BcbList SpinLock Acquire Bcb shared (exit if FALSE)
|
||
if not ReadOnly Increment PinCount
|
||
Acquire Bcb Shared Release BcbList SpinLock
|
||
|
||
It is important to note that most changes to this routine will affect
|
||
multiple cases from above.
|
||
|
||
Arguments:
|
||
|
||
FileObject - Pointer to File Object for file
|
||
|
||
FileOffset - Offset in file at which map should begin
|
||
|
||
Length - Length of desired map in bytes
|
||
|
||
ReadOnly - Supplies TRUE if caller will only read the mapped data (i.e.,
|
||
TRUE for CcCopyRead, CcMapData and CcMdlRead and FALSE for
|
||
everyone else)
|
||
|
||
WriteOnly - The specified range of bytes will only be written.
|
||
|
||
Flags - (PIN_WAIT, PIN_EXCLUSIVE, PIN_NO_READ, etc. as defined in cache.h)
|
||
|
||
Bcb - Returns a pointer to the Bcb representing the pinned data.
|
||
|
||
BaseAddress - Returns base address of desired data
|
||
|
||
BeyondLastByte - Returns the File Offset of the first byte beyond the
|
||
last accessible byte.
|
||
|
||
Return Value:
|
||
|
||
FALSE - if PIN_WAIT was set, and it was impossible to lock all
|
||
of the data without blocking
|
||
TRUE - if the desired data, is being returned
|
||
|
||
Raises:
|
||
|
||
STATUS_INSUFFICIENT_RESOURCES - If a pool allocation failure occurs.
|
||
This can only occur if Wait was specified as TRUE. (If Wait is
|
||
specified as FALSE, and an allocation failure occurs, this
|
||
routine simply returns FALSE.)
|
||
|
||
--*/
|
||
|
||
{
|
||
PSHARED_CACHE_MAP SharedCacheMap;
|
||
LARGE_INTEGER TrialBound;
|
||
KLOCK_QUEUE_HANDLE LockHandle;
|
||
PBCB BcbOut = NULL;
|
||
ULONG ZeroFlags = 0;
|
||
LOGICAL SpinLockAcquired = FALSE;
|
||
BOOLEAN Result = FALSE;
|
||
|
||
ULONG ReceivedLength;
|
||
ULONG ActivePage;
|
||
ULONG PageIsDirty;
|
||
PVACB Vacb = NULL;
|
||
|
||
DebugTrace(+1, me, "CcPinFileData:\n", 0 );
|
||
DebugTrace( 0, me, " FileObject = %08lx\n", FileObject );
|
||
DebugTrace2(0, me, " FileOffset = %08lx, %08lx\n", FileOffset->LowPart,
|
||
FileOffset->HighPart );
|
||
DebugTrace( 0, me, " Length = %08lx\n", Length );
|
||
DebugTrace( 0, me, " Flags = %02lx\n", Flags );
|
||
|
||
//
|
||
// Get pointer to SharedCacheMap via File Object.
|
||
//
|
||
|
||
SharedCacheMap = *(PSHARED_CACHE_MAP *)((PCHAR)FileObject->SectionObjectPointer
|
||
+ sizeof(PVOID));
|
||
|
||
//
|
||
// See if we have an active Vacb, that we need to free.
|
||
//
|
||
|
||
GetActiveVacb( SharedCacheMap, OldIrql, Vacb, ActivePage, PageIsDirty );
|
||
|
||
//
|
||
// If there is an end of a page to be zeroed, then free that page now,
|
||
// so it does not cause our data to get zeroed. If there is an active
|
||
// page, free it so we have the correct ValidDataGoal.
|
||
//
|
||
|
||
if ((Vacb != NULL) || (SharedCacheMap->NeedToZero != NULL)) {
|
||
|
||
CcFreeActiveVacb( SharedCacheMap, Vacb, ActivePage, PageIsDirty );
|
||
Vacb = NULL;
|
||
}
|
||
|
||
//
|
||
// Make sure the calling file system is not asking to map beyond the
|
||
// end of the section, for example, that it did not forget to do
|
||
// CcExtendCacheSection.
|
||
//
|
||
|
||
ASSERT( ( FileOffset->QuadPart + (LONGLONG)Length ) <=
|
||
SharedCacheMap->SectionSize.QuadPart );
|
||
|
||
//
|
||
// Initially clear output
|
||
//
|
||
|
||
*Bcb = NULL;
|
||
*BaseAddress = NULL;
|
||
|
||
if (!FlagOn(Flags, PIN_NO_READ)) {
|
||
|
||
*BaseAddress = CcGetVirtualAddress( SharedCacheMap,
|
||
*FileOffset,
|
||
&Vacb,
|
||
&ReceivedLength );
|
||
|
||
} else {
|
||
|
||
//
|
||
// In the PIN_NO_READ case, we simply need to make sure that the
|
||
// sparse structure containing the Bcb listheads is expanded in the
|
||
// region of the file we are interested in.
|
||
//
|
||
// Fake a ReceivedLength that matches the remaining bytes in the view.
|
||
//
|
||
|
||
ReceivedLength = VACB_MAPPING_GRANULARITY -
|
||
(ULONG)(FileOffset->QuadPart & (VACB_MAPPING_GRANULARITY - 1));
|
||
|
||
//
|
||
// Now simply cause a reference that will expand a multilevel Vacb.
|
||
//
|
||
|
||
CcReferenceFileOffset( SharedCacheMap, *FileOffset );
|
||
}
|
||
|
||
//
|
||
// Acquire Bcb List Exclusive to look for Bcb
|
||
//
|
||
|
||
KeAcquireInStackQueuedSpinLock( &SharedCacheMap->BcbSpinLock, &LockHandle );
|
||
SpinLockAcquired = TRUE;
|
||
|
||
//
|
||
// Use try to guarantee cleanup on the way out.
|
||
//
|
||
|
||
try {
|
||
|
||
LOGICAL Found;
|
||
LARGE_INTEGER FOffset;
|
||
LARGE_INTEGER TLength;
|
||
|
||
//
|
||
// Search for Bcb describing the largest matching "prefix" byte range,
|
||
// or where to insert it.
|
||
//
|
||
|
||
TrialBound.QuadPart = FileOffset->QuadPart + (LONGLONG)Length;
|
||
Found = CcFindBcb( SharedCacheMap, FileOffset, &TrialBound, &BcbOut );
|
||
|
||
|
||
//
|
||
// Cases 1 and 2 - Bcb was not found.
|
||
//
|
||
// First caculate data to pin down.
|
||
//
|
||
|
||
if (!Found) {
|
||
|
||
//
|
||
// Get out if the user specified PIN_IF_BCB.
|
||
//
|
||
|
||
if (FlagOn(Flags, PIN_IF_BCB)) {
|
||
|
||
//
|
||
// We need to zap BcbOut since this is a hint to the cleanup code
|
||
// to remove the Bcb if we are returning FALSE.
|
||
//
|
||
|
||
BcbOut = NULL;
|
||
try_return( Result = FALSE );
|
||
}
|
||
|
||
//
|
||
// Not found, calculate data to pin down.
|
||
//
|
||
// Round local copy of FileOffset down to page boundary, and
|
||
// round copies of size and minimum size up. Also make sure that
|
||
// we keep the length from crossing the end of the SharedCacheMap.
|
||
//
|
||
|
||
FOffset = *FileOffset;
|
||
TLength.QuadPart = TrialBound.QuadPart - FOffset.QuadPart;
|
||
|
||
TLength.LowPart += FOffset.LowPart & (PAGE_SIZE - 1);
|
||
ReceivedLength += FOffset.LowPart & (PAGE_SIZE - 1);
|
||
|
||
//
|
||
// At this point we can calculate the ReadOnly flag for
|
||
// the purposes of whether to use the Bcb resource, and
|
||
// we can calculate the ZeroFlags.
|
||
//
|
||
|
||
if ((!ReadOnly && !FlagOn(SharedCacheMap->Flags, PIN_ACCESS)) || WriteOnly) {
|
||
|
||
//
|
||
// We can always zero middle pages, if any.
|
||
//
|
||
|
||
ZeroFlags = ZERO_MIDDLE_PAGES;
|
||
|
||
if (((FOffset.LowPart & (PAGE_SIZE - 1)) == 0) &&
|
||
(Length >= PAGE_SIZE)) {
|
||
ZeroFlags |= ZERO_FIRST_PAGE;
|
||
}
|
||
|
||
if ((TLength.LowPart & (PAGE_SIZE - 1)) == 0) {
|
||
ZeroFlags |= ZERO_LAST_PAGE;
|
||
}
|
||
}
|
||
|
||
//
|
||
// We treat Bcbs as ReadOnly (do not acquire resource) if they
|
||
// are in sections for which we have not disabled modified writing.
|
||
//
|
||
|
||
if (!FlagOn(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED)) {
|
||
ReadOnly = TRUE;
|
||
}
|
||
|
||
TLength.LowPart = (ULONG) ROUND_TO_PAGES( TLength.LowPart );
|
||
|
||
//
|
||
// Round BaseAddress and FOffset down to the bottom of a page.
|
||
//
|
||
|
||
*BaseAddress = ((PCHAR)*BaseAddress - (FileOffset->LowPart & (PAGE_SIZE - 1)));
|
||
FOffset.LowPart &= ~(PAGE_SIZE - 1);
|
||
|
||
//
|
||
// Even if we are readonly, we can still zero pages entirely
|
||
// beyond valid data length.
|
||
//
|
||
|
||
if (FOffset.QuadPart >= SharedCacheMap->ValidDataGoal.QuadPart) {
|
||
|
||
ZeroFlags |= ZERO_FIRST_PAGE | ZERO_MIDDLE_PAGES | ZERO_LAST_PAGE;
|
||
|
||
} else if ((FOffset.QuadPart + (LONGLONG)PAGE_SIZE) >=
|
||
SharedCacheMap->ValidDataGoal.QuadPart) {
|
||
|
||
ZeroFlags |= ZERO_MIDDLE_PAGES | ZERO_LAST_PAGE;
|
||
}
|
||
|
||
//
|
||
// We will get into trouble if we try to read more than we
|
||
// can map by one Vacb. So make sure that our lengths stay
|
||
// within a Vacb.
|
||
//
|
||
|
||
if (TLength.LowPart > ReceivedLength) {
|
||
TLength.LowPart = ReceivedLength;
|
||
}
|
||
|
||
|
||
//
|
||
// Case 1 - Bcb was not found and Wait is TRUE.
|
||
//
|
||
// Note that it is important to minimize the time that the Bcb
|
||
// List spin lock is held, as well as guarantee we do not take
|
||
// any faults while holding this lock.
|
||
//
|
||
// If we can (and perhaps will) wait, then it is important to
|
||
// allocate the Bcb acquire it exclusive and free the Bcb List.
|
||
// We then procede to read in the data, and anyone else finding
|
||
// our Bcb will have to wait shared to insure that the data is
|
||
// in.
|
||
//
|
||
|
||
if (FlagOn(Flags, PIN_WAIT)) {
|
||
|
||
BcbOut = CcAllocateInitializeBcb( SharedCacheMap,
|
||
BcbOut,
|
||
&FOffset,
|
||
&TLength );
|
||
|
||
if (BcbOut == NULL) {
|
||
DebugTrace( 0, 0, "Bcb allocation failure\n", 0 );
|
||
KeReleaseInStackQueuedSpinLock( &LockHandle );
|
||
SpinLockAcquired = FALSE;
|
||
ExRaiseStatus( STATUS_INSUFFICIENT_RESOURCES );
|
||
}
|
||
|
||
//
|
||
// Now just acquire the newly-allocated Bcb shared, and
|
||
// release the spin lock.
|
||
//
|
||
|
||
if (!ReadOnly) {
|
||
if (FlagOn(Flags, PIN_EXCLUSIVE)) {
|
||
(VOID)ExAcquireResourceExclusiveLite( &BcbOut->Resource, TRUE );
|
||
} else {
|
||
(VOID)ExAcquireSharedStarveExclusive( &BcbOut->Resource, TRUE );
|
||
}
|
||
}
|
||
KeReleaseInStackQueuedSpinLock( &LockHandle );
|
||
SpinLockAcquired = FALSE;
|
||
|
||
//
|
||
// Now read in the data.
|
||
//
|
||
|
||
if (!FlagOn(Flags, PIN_NO_READ)) {
|
||
|
||
(VOID)CcMapAndRead( SharedCacheMap,
|
||
&FOffset,
|
||
TLength.LowPart,
|
||
ZeroFlags,
|
||
TRUE,
|
||
*BaseAddress );
|
||
|
||
//
|
||
// Now we have to reacquire the Bcb List spinlock to load
|
||
// up the mapping if we are the first one, else we collided
|
||
// with someone else who loaded the mapping first, and we
|
||
// will just free our mapping. It is guaranteed that the
|
||
// data will be mapped to the same place.
|
||
//
|
||
|
||
KeAcquireInStackQueuedSpinLock( &SharedCacheMap->BcbSpinLock, &LockHandle );
|
||
|
||
if (BcbOut->BaseAddress == NULL) {
|
||
|
||
BcbOut->BaseAddress = *BaseAddress;
|
||
BcbOut->Vacb = Vacb;
|
||
Vacb = NULL;
|
||
}
|
||
|
||
KeReleaseInStackQueuedSpinLock( &LockHandle );
|
||
|
||
//
|
||
// Calculate Base Address of the data we want.
|
||
//
|
||
|
||
*BaseAddress = (PCHAR)BcbOut->BaseAddress +
|
||
(ULONG)( FileOffset->QuadPart - BcbOut->FileOffset.QuadPart );
|
||
}
|
||
|
||
//
|
||
// Success!
|
||
//
|
||
|
||
try_return( Result = TRUE );
|
||
}
|
||
|
||
|
||
//
|
||
// Case 2 - Bcb was not found and Wait is FALSE
|
||
//
|
||
// If we cannot wait, then we go immediately see if the data is
|
||
// there (CcMapAndRead), and then only set up the Bcb and release
|
||
// the spin lock if the data is there. Note here we call
|
||
// CcMapAndRead while holding the spin lock, because we know we
|
||
// will not fault and not block before returning.
|
||
//
|
||
|
||
else {
|
||
|
||
//
|
||
// Now try to allocate and initialize the Bcb. If we
|
||
// fail to allocate one, then return FALSE, since we know that
|
||
// Wait = FALSE. The caller may get lucky if he calls
|
||
// us back with Wait = TRUE.
|
||
//
|
||
|
||
BcbOut = CcAllocateInitializeBcb( SharedCacheMap,
|
||
BcbOut,
|
||
&FOffset,
|
||
&TLength );
|
||
|
||
if (BcbOut == NULL) {
|
||
|
||
try_return( Result = FALSE );
|
||
}
|
||
|
||
//
|
||
// If we are not ReadOnly, we must acquire the newly-allocated
|
||
// resource shared, and then we can free the spin lock.
|
||
//
|
||
|
||
if (!ReadOnly) {
|
||
ExAcquireSharedStarveExclusive( &BcbOut->Resource, TRUE );
|
||
}
|
||
KeReleaseInStackQueuedSpinLock( &LockHandle );
|
||
SpinLockAcquired = FALSE;
|
||
|
||
//
|
||
// Note that since this call has Wait = FALSE, it cannot
|
||
// get an exception (see procedure header).
|
||
//
|
||
|
||
ASSERT( !FlagOn(Flags, PIN_NO_READ) );
|
||
if (!CcMapAndRead( SharedCacheMap,
|
||
&FOffset,
|
||
TLength.LowPart,
|
||
ZeroFlags,
|
||
FALSE,
|
||
*BaseAddress )) {
|
||
|
||
try_return( Result = FALSE );
|
||
}
|
||
|
||
//
|
||
// Now we have to reacquire the Bcb List spinlock to load
|
||
// up the mapping if we are the first one, else we collided
|
||
// with someone else who loaded the mapping first, and we
|
||
// will just free our mapping. It is guaranteed that the
|
||
// data will be mapped to the same place.
|
||
//
|
||
|
||
KeAcquireInStackQueuedSpinLock( &SharedCacheMap->BcbSpinLock, &LockHandle );
|
||
|
||
if (BcbOut->BaseAddress == NULL) {
|
||
|
||
BcbOut->BaseAddress = *BaseAddress;
|
||
BcbOut->Vacb = Vacb;
|
||
Vacb = NULL;
|
||
}
|
||
|
||
KeReleaseInStackQueuedSpinLock( &LockHandle );
|
||
|
||
//
|
||
// Calculate Base Address of the data we want.
|
||
//
|
||
|
||
*BaseAddress = (PCHAR)BcbOut->BaseAddress +
|
||
(ULONG)( FileOffset->QuadPart - BcbOut->FileOffset.QuadPart );
|
||
|
||
//
|
||
// Success!
|
||
//
|
||
|
||
try_return( Result = TRUE );
|
||
}
|
||
|
||
} else {
|
||
|
||
//
|
||
// We treat Bcbs as ReadOnly (do not acquire resource) if they
|
||
// are in sections for which we have not disabled modified writing.
|
||
//
|
||
|
||
if (!FlagOn(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED)) {
|
||
ReadOnly = TRUE;
|
||
}
|
||
}
|
||
|
||
|
||
//
|
||
// Cases 3 and 4 - Bcb is there but not mapped
|
||
//
|
||
|
||
if (BcbOut->BaseAddress == NULL) {
|
||
|
||
//
|
||
// It is too complicated to attempt to calculate any ZeroFlags in this
|
||
// case, because we have to not only do the tests above, but also
|
||
// compare to the byte range in the Bcb since we will be passing
|
||
// those parameters to CcMapAndRead. Also, the probability of hitting
|
||
// some window where zeroing is of any advantage is quite small.
|
||
//
|
||
|
||
//
|
||
// Set up to just reread the Bcb exactly as the data in it is
|
||
// described.
|
||
//
|
||
|
||
*BaseAddress = ((PCHAR)*BaseAddress - (FileOffset->LowPart - BcbOut->FileOffset.LowPart));
|
||
FOffset = BcbOut->FileOffset;
|
||
TLength.QuadPart = (LONGLONG)BcbOut->ByteLength;
|
||
|
||
//
|
||
// Case 3 - Bcb is there but not mapped and Wait is TRUE
|
||
//
|
||
// Increment the PinCount, and then release the BcbList
|
||
// SpinLock so that we can wait to acquire the Bcb exclusive.
|
||
// Once we have the Bcb exclusive, map and read it in if no
|
||
// one beats us to it. Someone may have beat us to it since
|
||
// we had to release the SpinLock above.
|
||
//
|
||
|
||
if (FlagOn(Flags, PIN_WAIT)) {
|
||
|
||
BcbOut->PinCount += 1;
|
||
|
||
//
|
||
// Now we have to release the BcbList SpinLock in order to
|
||
// acquire the Bcb shared.
|
||
//
|
||
|
||
KeReleaseInStackQueuedSpinLock( &LockHandle );
|
||
SpinLockAcquired = FALSE;
|
||
if (!ReadOnly) {
|
||
if (FlagOn(Flags, PIN_EXCLUSIVE)) {
|
||
(VOID)ExAcquireResourceExclusiveLite( &BcbOut->Resource, TRUE );
|
||
} else {
|
||
(VOID)ExAcquireSharedStarveExclusive( &BcbOut->Resource, TRUE );
|
||
}
|
||
}
|
||
|
||
//
|
||
// Now procede to map and read the data in.
|
||
//
|
||
// Now read in the data.
|
||
//
|
||
|
||
if (!FlagOn(Flags, PIN_NO_READ)) {
|
||
|
||
(VOID)CcMapAndRead( SharedCacheMap,
|
||
&FOffset,
|
||
TLength.LowPart,
|
||
ZeroFlags,
|
||
TRUE,
|
||
*BaseAddress );
|
||
|
||
//
|
||
// Now we have to reacquire the Bcb List spinlock to load
|
||
// up the mapping if we are the first one, else we collided
|
||
// with someone else who loaded the mapping first, and we
|
||
// will just free our mapping. It is guaranteed that the
|
||
// data will be mapped to the same place.
|
||
//
|
||
|
||
KeAcquireInStackQueuedSpinLock( &SharedCacheMap->BcbSpinLock, &LockHandle );
|
||
|
||
if (BcbOut->BaseAddress == NULL) {
|
||
|
||
BcbOut->BaseAddress = *BaseAddress;
|
||
BcbOut->Vacb = Vacb;
|
||
Vacb = NULL;
|
||
}
|
||
|
||
KeReleaseInStackQueuedSpinLock( &LockHandle );
|
||
|
||
//
|
||
//
|
||
// Calculate Base Address of the data we want.
|
||
//
|
||
|
||
*BaseAddress = (PCHAR)BcbOut->BaseAddress +
|
||
(ULONG)( FileOffset->QuadPart - BcbOut->FileOffset.QuadPart );
|
||
}
|
||
|
||
//
|
||
// Success!
|
||
//
|
||
|
||
try_return( Result = TRUE );
|
||
}
|
||
|
||
|
||
//
|
||
// Case 4 - Bcb is there but not mapped, and Wait is FALSE
|
||
//
|
||
// Since we cannot wait, we go immediately see if the data is
|
||
// there (CcMapAndRead), and then only set up the Bcb and release
|
||
// the spin lock if the data is there. Note here we call
|
||
// CcMapAndRead while holding the spin lock, because we know we
|
||
// will not fault and not block before returning.
|
||
//
|
||
|
||
else {
|
||
|
||
if (!ReadOnly && !ExAcquireSharedStarveExclusive( &BcbOut->Resource, FALSE )) {
|
||
|
||
//
|
||
// If we cannot get the resource and have not incremented PinCount, then
|
||
// suppress the unpin on cleanup.
|
||
//
|
||
|
||
BcbOut = NULL;
|
||
try_return( Result = FALSE );
|
||
}
|
||
|
||
BcbOut->PinCount += 1;
|
||
|
||
KeReleaseInStackQueuedSpinLock( &LockHandle );
|
||
SpinLockAcquired = FALSE;
|
||
|
||
//
|
||
// Note that since this call has Wait = FALSE, it cannot
|
||
// get an exception (see procedure header).
|
||
//
|
||
|
||
ASSERT( !FlagOn(Flags, PIN_NO_READ) );
|
||
if (!CcMapAndRead( SharedCacheMap,
|
||
&BcbOut->FileOffset,
|
||
BcbOut->ByteLength,
|
||
ZeroFlags,
|
||
FALSE,
|
||
*BaseAddress )) {
|
||
|
||
try_return( Result = FALSE );
|
||
}
|
||
|
||
//
|
||
// Now we have to reacquire the Bcb List spinlock to load
|
||
// up the mapping if we are the first one, else we collided
|
||
// with someone else who loaded the mapping first, and we
|
||
// will just free our mapping. It is guaranteed that the
|
||
// data will be mapped to the same place.
|
||
//
|
||
|
||
KeAcquireInStackQueuedSpinLock( &SharedCacheMap->BcbSpinLock, &LockHandle );
|
||
|
||
if (BcbOut->BaseAddress == NULL) {
|
||
|
||
BcbOut->BaseAddress = *BaseAddress;
|
||
BcbOut->Vacb = Vacb;
|
||
Vacb = NULL;
|
||
}
|
||
|
||
KeReleaseInStackQueuedSpinLock( &LockHandle );
|
||
|
||
//
|
||
// Calculate Base Address of the data we want.
|
||
//
|
||
|
||
*BaseAddress = (PCHAR)BcbOut->BaseAddress +
|
||
(ULONG)( FileOffset->QuadPart - BcbOut->FileOffset.QuadPart );
|
||
|
||
//
|
||
// Success!
|
||
//
|
||
|
||
try_return( Result = TRUE );
|
||
}
|
||
}
|
||
|
||
|
||
//
|
||
// Cases 5 and 6 - Bcb is there and it is mapped
|
||
//
|
||
|
||
else {
|
||
|
||
//
|
||
// Case 5 - Bcb is there and mapped, and Wait is TRUE
|
||
//
|
||
// We can just increment the PinCount, release the SpinLock
|
||
// and then acquire the Bcb Shared if we are not ReadOnly.
|
||
//
|
||
|
||
if (FlagOn(Flags, PIN_WAIT)) {
|
||
|
||
BcbOut->PinCount += 1;
|
||
KeReleaseInStackQueuedSpinLock( &LockHandle );
|
||
SpinLockAcquired = FALSE;
|
||
|
||
//
|
||
// Acquire Bcb Resource shared to insure that it is in memory.
|
||
//
|
||
|
||
if (!ReadOnly) {
|
||
if (FlagOn(Flags, PIN_EXCLUSIVE)) {
|
||
(VOID)ExAcquireResourceExclusiveLite( &BcbOut->Resource, TRUE );
|
||
} else {
|
||
(VOID)ExAcquireSharedStarveExclusive( &BcbOut->Resource, TRUE );
|
||
}
|
||
}
|
||
}
|
||
|
||
//
|
||
// Case 6 - Bcb is there and mapped, and Wait is FALSE
|
||
//
|
||
// If we are not ReadOnly, we have to first see if we can
|
||
// acquire the Bcb shared before incrmenting the PinCount,
|
||
// since we will have to return FALSE if we cannot acquire the
|
||
// resource.
|
||
//
|
||
|
||
else {
|
||
|
||
//
|
||
// Acquire Bcb Resource shared to insure that it is in memory.
|
||
//
|
||
|
||
if (!ReadOnly && !ExAcquireSharedStarveExclusive( &BcbOut->Resource, FALSE )) {
|
||
|
||
//
|
||
// If we cannot get the resource and have not incremented PinCount, then
|
||
// suppress the unpin on cleanup.
|
||
//
|
||
|
||
BcbOut = NULL;
|
||
try_return( Result = FALSE );
|
||
}
|
||
|
||
BcbOut->PinCount += 1;
|
||
KeReleaseInStackQueuedSpinLock( &LockHandle );
|
||
SpinLockAcquired = FALSE;
|
||
}
|
||
|
||
//
|
||
// Calculate Base Address of the data we want.
|
||
//
|
||
|
||
*BaseAddress = (PCHAR)BcbOut->BaseAddress +
|
||
(ULONG)( FileOffset->QuadPart - BcbOut->FileOffset.QuadPart );
|
||
|
||
//
|
||
// Success!
|
||
//
|
||
|
||
try_return( Result = TRUE );
|
||
}
|
||
|
||
|
||
try_exit: NOTHING;
|
||
|
||
if (FlagOn(Flags, PIN_NO_READ) &&
|
||
FlagOn(Flags, PIN_EXCLUSIVE) &&
|
||
(BcbOut != NULL) &&
|
||
(BcbOut->BaseAddress != NULL)) {
|
||
|
||
//
|
||
// Unmap the Vacb and free the resource if the Bcb is still
|
||
// dirty. We have to free the resource before dropping the
|
||
// spinlock, and we want to hold the resource until the
|
||
// virtual address is freed.
|
||
//
|
||
|
||
CcFreeVirtualAddress( BcbOut->Vacb );
|
||
|
||
BcbOut->BaseAddress = NULL;
|
||
BcbOut->Vacb = NULL;
|
||
}
|
||
|
||
} finally {
|
||
|
||
//
|
||
// Release the spinlock if it is acquired.
|
||
//
|
||
|
||
if (SpinLockAcquired) {
|
||
KeReleaseInStackQueuedSpinLock( &LockHandle );
|
||
}
|
||
|
||
//
|
||
// If the Vacb was not used for any reason (error or not needed), then free it here.
|
||
//
|
||
|
||
if (Vacb != NULL) {
|
||
CcFreeVirtualAddress( Vacb );
|
||
}
|
||
|
||
//
|
||
// If we referenced a piece of a multilevel structure, release here.
|
||
//
|
||
|
||
if (FlagOn(Flags, PIN_NO_READ)) {
|
||
|
||
CcDereferenceFileOffset( SharedCacheMap, *FileOffset );
|
||
}
|
||
|
||
if (Result) {
|
||
|
||
*Bcb = BcbOut;
|
||
*BeyondLastByte = BcbOut->BeyondLastByte;
|
||
|
||
//
|
||
// An abnormal termination can occur on an allocation failure,
|
||
// or on a failure to map and read the buffer.
|
||
//
|
||
|
||
} else {
|
||
|
||
*BaseAddress = NULL;
|
||
if (BcbOut != NULL) {
|
||
CcUnpinFileData( BcbOut, ReadOnly, UNPIN );
|
||
}
|
||
}
|
||
|
||
DebugTrace( 0, me, " <Bcb = %08lx\n", *Bcb );
|
||
DebugTrace( 0, me, " <BaseAddress = %08lx\n", *BaseAddress );
|
||
DebugTrace(-1, me, "CcPinFileData -> %02lx\n", Result );
|
||
}
|
||
|
||
return Result;
|
||
}
|
||
|
||
|
||
//
|
||
// Internal Support Routine
|
||
//
|
||
|
||
VOID
|
||
FASTCALL
|
||
CcUnpinFileData (
|
||
IN OUT PBCB Bcb,
|
||
IN BOOLEAN ReadOnly,
|
||
IN UNMAP_ACTIONS UnmapAction
|
||
)
|
||
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
This routine umaps and unlocks the specified buffer, which was previously
|
||
locked and mapped by calling CcPinFileData.
|
||
|
||
Arguments:
|
||
|
||
Bcb - Pointer previously returned from CcPinFileData. As may be
|
||
seen above, this pointer may be either a Bcb or a Vacb.
|
||
|
||
ReadOnly - must specify same value as when data was mapped
|
||
|
||
UnmapAction - UNPIN or SET_CLEAN
|
||
|
||
Return Value:
|
||
|
||
None
|
||
|
||
--*/
|
||
|
||
{
|
||
KLOCK_QUEUE_HANDLE LockHandle;
|
||
PSHARED_CACHE_MAP SharedCacheMap;
|
||
|
||
DebugTrace(+1, me, "CcUnpinFileData >Bcb = %08lx\n", Bcb );
|
||
|
||
//
|
||
// Note, since we have to allocate so many Vacbs, we do not use
|
||
// a node type code. However, the Vacb starts with a BaseAddress,
|
||
// so we assume that the low byte of the Bcb node type code has
|
||
// some bits set, which a page-aligned Base Address cannot.
|
||
//
|
||
|
||
ASSERT( (CACHE_NTC_BCB & 0xFF) != 0 );
|
||
|
||
if (Bcb->NodeTypeCode != CACHE_NTC_BCB) {
|
||
|
||
ASSERT(((PVACB)Bcb >= CcVacbs) && ((PVACB)Bcb < CcBeyondVacbs));
|
||
ASSERT(((PVACB)Bcb)->SharedCacheMap->NodeTypeCode == CACHE_NTC_SHARED_CACHE_MAP);
|
||
|
||
CcFreeVirtualAddress( (PVACB)Bcb );
|
||
|
||
DebugTrace(-1, me, "CcUnpinFileData -> VOID (simple release)\n", 0 );
|
||
|
||
return;
|
||
}
|
||
|
||
SharedCacheMap = Bcb->SharedCacheMap;
|
||
|
||
//
|
||
// We treat Bcbs as ReadOnly (do not acquire resource) if they
|
||
// are in sections for which we have not disabled modified writing, or
|
||
// in this special case if this action is a dereferencing of the BCB.
|
||
//
|
||
|
||
if (!FlagOn(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED) ||
|
||
UnmapAction == UNREF) {
|
||
ReadOnly = TRUE;
|
||
}
|
||
|
||
//
|
||
// Synchronize
|
||
//
|
||
|
||
KeAcquireInStackQueuedSpinLock( &SharedCacheMap->BcbSpinLock, &LockHandle );
|
||
|
||
switch (UnmapAction) {
|
||
|
||
case UNPIN:
|
||
case UNREF:
|
||
|
||
ASSERT( Bcb->PinCount > 0 );
|
||
|
||
Bcb->PinCount -= 1;
|
||
break;
|
||
|
||
case SET_CLEAN:
|
||
|
||
if (Bcb->Dirty) {
|
||
|
||
ULONG Pages = Bcb->ByteLength >> PAGE_SHIFT;
|
||
|
||
//
|
||
// Reverse the rest of the actions taken when the Bcb was set dirty.
|
||
//
|
||
|
||
Bcb->Dirty = FALSE;
|
||
|
||
CcAcquireMasterLockAtDpcLevel();
|
||
CcDeductDirtyPages( SharedCacheMap, Pages );
|
||
|
||
//
|
||
// Normally we need to reduce CcPagesYetToWrite appropriately.
|
||
//
|
||
|
||
if (CcPagesYetToWrite > Pages) {
|
||
CcPagesYetToWrite -= Pages;
|
||
} else {
|
||
CcPagesYetToWrite = 0;
|
||
}
|
||
|
||
//
|
||
// Remove SharedCacheMap from dirty list if nothing more dirty,
|
||
// and someone still has the cache map opened.
|
||
//
|
||
|
||
if ((SharedCacheMap->DirtyPages == 0) &&
|
||
(SharedCacheMap->OpenCount != 0)) {
|
||
|
||
RemoveEntryList( &SharedCacheMap->SharedCacheMapLinks );
|
||
InsertTailList( &CcCleanSharedCacheMapList,
|
||
&SharedCacheMap->SharedCacheMapLinks );
|
||
}
|
||
|
||
CcReleaseMasterLockFromDpcLevel();
|
||
}
|
||
|
||
break;
|
||
|
||
default:
|
||
CcBugCheck( UnmapAction, 0, 0 );
|
||
}
|
||
|
||
//
|
||
// If we brought it to 0, then we have to kill it.
|
||
//
|
||
|
||
if (Bcb->PinCount == 0) {
|
||
|
||
//
|
||
// If the Bcb is Dirty, we only release the resource and unmap now.
|
||
//
|
||
|
||
if (Bcb->Dirty) {
|
||
|
||
if (Bcb->BaseAddress != NULL) {
|
||
|
||
//
|
||
// Unmap the Vacb and free the resource if the Bcb is still
|
||
// dirty. We have to free the resource before dropping the
|
||
// spinlock, and we want to hold the resource until the
|
||
// virtual address is freed.
|
||
//
|
||
|
||
CcFreeVirtualAddress( Bcb->Vacb );
|
||
|
||
Bcb->BaseAddress = NULL;
|
||
Bcb->Vacb = NULL;
|
||
}
|
||
|
||
if (!ReadOnly) {
|
||
ExReleaseResourceLite( &Bcb->Resource );
|
||
}
|
||
|
||
KeReleaseInStackQueuedSpinLock( &LockHandle );
|
||
}
|
||
|
||
//
|
||
// Otherwise, we also delete the Bcb.
|
||
//
|
||
|
||
else {
|
||
|
||
//
|
||
// Since CcCalculateVacbLockCount has to be able to walk
|
||
// the BcbList with only the VacbSpinLock, we take that one
|
||
// out to change the list and decrement the level.
|
||
//
|
||
|
||
CcAcquireVacbLockAtDpcLevel();
|
||
RemoveEntryList( &Bcb->BcbLinks );
|
||
|
||
//
|
||
// For large metadata streams we unlock the Vacb level.
|
||
//
|
||
|
||
CcUnlockVacbLevel( SharedCacheMap, Bcb->FileOffset.QuadPart );
|
||
CcReleaseVacbLockFromDpcLevel();
|
||
|
||
//
|
||
// Debug routines used to remove Bcbs from the global list
|
||
//
|
||
|
||
#if LIST_DBG
|
||
|
||
KeAcquireQueuedSpinLockAtDpcLevel( KeQueuedSpinLockContext(LockQueueBcbLock) );
|
||
|
||
if (Bcb->CcBcbLinks.Flink != NULL) {
|
||
|
||
RemoveEntryList( &Bcb->CcBcbLinks );
|
||
CcBcbCount -= 1;
|
||
}
|
||
|
||
KeReleaseQueuedSpinLockFromDpcLevel( KeQueuedSpinLockContext(LockQueueBcbLock) );
|
||
|
||
#endif
|
||
|
||
if (Bcb->BaseAddress != NULL) {
|
||
|
||
CcFreeVirtualAddress( Bcb->Vacb );
|
||
}
|
||
#if DBG
|
||
if (!ReadOnly) {
|
||
ExReleaseResourceLite( &Bcb->Resource );
|
||
}
|
||
|
||
//
|
||
// ASSERT that the resource is unowned.
|
||
//
|
||
|
||
ASSERT( Bcb->Resource.ActiveCount == 0 );
|
||
#endif
|
||
KeReleaseInStackQueuedSpinLock( &LockHandle );
|
||
CcDeallocateBcb( Bcb );
|
||
}
|
||
}
|
||
|
||
//
|
||
// Else we just have to release our Shared access, if we are not
|
||
// readonly. We don't need to do this above, since we deallocate
|
||
// the entire Bcb there.
|
||
//
|
||
|
||
else {
|
||
|
||
if (!ReadOnly) {
|
||
ExReleaseResourceLite( &Bcb->Resource );
|
||
}
|
||
|
||
KeReleaseInStackQueuedSpinLock( &LockHandle );
|
||
}
|
||
|
||
DebugTrace(-1, me, "CcUnpinFileData -> VOID\n", 0 );
|
||
|
||
return;
|
||
}
|
||
|
||
|
||
VOID
|
||
CcSetReadAheadGranularity (
|
||
IN PFILE_OBJECT FileObject,
|
||
IN ULONG Granularity
|
||
)
|
||
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
This routine may be called to set the read ahead granularity used by
|
||
the Cache Manager. The default is PAGE_SIZE. The number is decremented
|
||
and stored as a mask.
|
||
|
||
Arguments:
|
||
|
||
FileObject - File Object for which granularity shall be set
|
||
|
||
Granularity - new granularity, which must be an even power of 2 and
|
||
>= PAGE_SIZE
|
||
|
||
Return Value:
|
||
|
||
None
|
||
--*/
|
||
|
||
{
|
||
((PPRIVATE_CACHE_MAP)FileObject->PrivateCacheMap)->ReadAheadMask = Granularity - 1;
|
||
}
|
||
|
||
|
||
VOID
|
||
CcScheduleReadAhead (
|
||
IN PFILE_OBJECT FileObject,
|
||
IN PLARGE_INTEGER FileOffset,
|
||
IN ULONG Length
|
||
)
|
||
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
This routine is called by Copy Read and Mdl Read file system routines to
|
||
perform common Read Ahead processing. The input parameters describe
|
||
the current read which has just been completed, or perhaps only started
|
||
in the case of Mdl Reads. Based on these parameters, an
|
||
assessment is made on how much data should be read ahead, and whether
|
||
that data has already been read ahead.
|
||
|
||
The processing is divided into two parts:
|
||
|
||
CALCULATE READ AHEAD REQUIREMENTS (CcScheduleReadAhead)
|
||
|
||
PERFORM READ AHEAD (CcPerformReadAhead)
|
||
|
||
File systems should always call CcReadAhead, which will conditionally
|
||
call CcScheduleReadAhead (if the read is large enough). If such a call
|
||
determines that there is read ahead work to do, and no read ahead is
|
||
currently active, then it will set ReadAheadActive and schedule read
|
||
ahead to be peformed by the Lazy Writer, who will call CcPeformReadAhead.
|
||
|
||
Arguments:
|
||
|
||
FileObject - supplies pointer to FileObject on which readahead should be
|
||
considered.
|
||
|
||
FileOffset - supplies the FileOffset at which the last read just occurred.
|
||
|
||
Length - supplies the length of the last read.
|
||
|
||
Return Value:
|
||
|
||
None
|
||
--*/
|
||
|
||
{
|
||
LARGE_INTEGER NewOffset;
|
||
LARGE_INTEGER NewBeyond;
|
||
LARGE_INTEGER FileOffset1, FileOffset2;
|
||
KIRQL OldIrql;
|
||
PSHARED_CACHE_MAP SharedCacheMap;
|
||
PPRIVATE_CACHE_MAP PrivateCacheMap;
|
||
PWORK_QUEUE_ENTRY WorkQueueEntry;
|
||
ULONG ReadAheadSize;
|
||
LOGICAL Changed = FALSE;
|
||
|
||
DebugTrace(+1, me, "CcScheduleReadAhead:\n", 0 );
|
||
DebugTrace2(0, me, " FileOffset = %08lx, %08lx\n", FileOffset->LowPart,
|
||
FileOffset->HighPart );
|
||
DebugTrace( 0, me, " Length = %08lx\n", Length );
|
||
|
||
SharedCacheMap = *(PSHARED_CACHE_MAP *)((PCHAR)FileObject->SectionObjectPointer
|
||
+ sizeof(PVOID));
|
||
PrivateCacheMap = FileObject->PrivateCacheMap;
|
||
|
||
if ((PrivateCacheMap == NULL) ||
|
||
(SharedCacheMap == NULL) ||
|
||
FlagOn(SharedCacheMap->Flags, DISABLE_READ_AHEAD)) {
|
||
|
||
DebugTrace(-1, me, "CcScheduleReadAhead -> VOID (Nooped)\n", 0 );
|
||
|
||
return;
|
||
}
|
||
|
||
//
|
||
// Round boundaries of transfer up to some greater granularity, so that
|
||
// sequential reads will be recognized even if a few bytes are skipped
|
||
// between records.
|
||
//
|
||
|
||
NewOffset = *FileOffset;
|
||
NewBeyond.QuadPart = FileOffset->QuadPart + (LONGLONG)Length;
|
||
|
||
//
|
||
// Find the next read ahead boundary beyond the current read.
|
||
//
|
||
|
||
ReadAheadSize = (Length + PrivateCacheMap->ReadAheadMask) & ~PrivateCacheMap->ReadAheadMask;
|
||
FileOffset2.QuadPart = NewBeyond.QuadPart + (LONGLONG)ReadAheadSize;
|
||
FileOffset2.LowPart &= ~PrivateCacheMap->ReadAheadMask;
|
||
|
||
//
|
||
// CALCULATE READ AHEAD REQUIREMENTS
|
||
//
|
||
|
||
//
|
||
// Take out the ReadAhead spinlock to synchronize our read ahead decision.
|
||
//
|
||
|
||
ExAcquireSpinLock( &PrivateCacheMap->ReadAheadSpinLock, &OldIrql );
|
||
|
||
//
|
||
// Read Ahead Case 0.
|
||
//
|
||
// Sequential-only hint in the file object. For this case we will
|
||
// try and always keep two read ahead granularities read ahead from
|
||
// and including the end of the current transfer. This case has the
|
||
// lowest overhead, and the code is completely immune to how the
|
||
// caller skips around. Sequential files use ReadAheadOffset[1] in
|
||
// the PrivateCacheMap as their "high water mark".
|
||
//
|
||
|
||
if (FlagOn(FileObject->Flags, FO_SEQUENTIAL_ONLY)) {
|
||
|
||
//
|
||
// If the next boundary is greater than or equal to the high-water mark,
|
||
// then read ahead.
|
||
//
|
||
|
||
if (FileOffset2.QuadPart >= PrivateCacheMap->ReadAheadOffset[1].QuadPart) {
|
||
|
||
//
|
||
// On the first read if we are using a large read ahead granularity,
|
||
// and the read did not get it all, we will just get the rest of the
|
||
// first data we want.
|
||
//
|
||
|
||
if ((FileOffset->QuadPart == 0)
|
||
|
||
&&
|
||
|
||
(PrivateCacheMap->ReadAheadMask > (PAGE_SIZE - 1))
|
||
|
||
&&
|
||
|
||
((Length + PAGE_SIZE - 1) <= PrivateCacheMap->ReadAheadMask)) {
|
||
|
||
FileOffset1.QuadPart = (LONGLONG)( ROUND_TO_PAGES(Length) );
|
||
PrivateCacheMap->ReadAheadLength[0] = ReadAheadSize - FileOffset1.LowPart;
|
||
FileOffset2.QuadPart = (LONGLONG)ReadAheadSize;
|
||
|
||
//
|
||
// Calculate the next read ahead boundary.
|
||
//
|
||
|
||
} else {
|
||
|
||
FileOffset1.QuadPart = PrivateCacheMap->ReadAheadOffset[1].QuadPart +
|
||
(LONGLONG)ReadAheadSize;
|
||
|
||
//
|
||
// If the end of the current read is actually beyond where we would
|
||
// normally do our read ahead, then we have fallen behind, and we must
|
||
// advance to that spot.
|
||
//
|
||
|
||
if (FileOffset2.QuadPart > FileOffset1.QuadPart) {
|
||
FileOffset1 = FileOffset2;
|
||
}
|
||
PrivateCacheMap->ReadAheadLength[0] = ReadAheadSize;
|
||
FileOffset2.QuadPart = FileOffset1.QuadPart + (LONGLONG)ReadAheadSize;
|
||
}
|
||
|
||
//
|
||
// Now issue the next two read aheads.
|
||
//
|
||
|
||
PrivateCacheMap->ReadAheadOffset[0] = FileOffset1;
|
||
|
||
PrivateCacheMap->ReadAheadOffset[1] = FileOffset2;
|
||
PrivateCacheMap->ReadAheadLength[1] = ReadAheadSize;
|
||
|
||
Changed = TRUE;
|
||
}
|
||
|
||
//
|
||
// Read Ahead Case 1.
|
||
//
|
||
// If this is the third of three sequential reads, then we will see if
|
||
// we can read ahead. Note that if the first read to a file is to
|
||
// offset 0, it passes this test.
|
||
//
|
||
|
||
} else if ((NewOffset.HighPart == PrivateCacheMap->BeyondLastByte2.HighPart)
|
||
|
||
&&
|
||
|
||
((NewOffset.LowPart & ~NOISE_BITS)
|
||
== (PrivateCacheMap->BeyondLastByte2.LowPart & ~NOISE_BITS))
|
||
|
||
&&
|
||
|
||
(PrivateCacheMap->FileOffset2.HighPart
|
||
== PrivateCacheMap->BeyondLastByte1.HighPart)
|
||
|
||
&&
|
||
|
||
((PrivateCacheMap->FileOffset2.LowPart & ~NOISE_BITS)
|
||
== (PrivateCacheMap->BeyondLastByte1.LowPart & ~NOISE_BITS))) {
|
||
|
||
//
|
||
// On the first read if we are using a large read ahead granularity,
|
||
// and the read did not get it all, we will just get the rest of the
|
||
// first data we want.
|
||
//
|
||
|
||
if ((FileOffset->QuadPart == 0)
|
||
|
||
&&
|
||
|
||
(PrivateCacheMap->ReadAheadMask > (PAGE_SIZE - 1))
|
||
|
||
&&
|
||
|
||
((Length + PAGE_SIZE - 1) <= PrivateCacheMap->ReadAheadMask)) {
|
||
|
||
FileOffset2.QuadPart = (LONGLONG)( ROUND_TO_PAGES(Length) );
|
||
}
|
||
|
||
//
|
||
// Round read offset to next read ahead boundary.
|
||
//
|
||
|
||
else {
|
||
FileOffset2.QuadPart = NewBeyond.QuadPart + (LONGLONG)ReadAheadSize;
|
||
|
||
FileOffset2.LowPart &= ~PrivateCacheMap->ReadAheadMask;
|
||
}
|
||
|
||
//
|
||
// Set read ahead length to be the same as for the most recent read,
|
||
// up to our max.
|
||
//
|
||
|
||
if (FileOffset2.QuadPart != PrivateCacheMap->ReadAheadOffset[1].QuadPart) {
|
||
|
||
ASSERT( FileOffset2.HighPart >= 0 );
|
||
|
||
Changed = TRUE;
|
||
PrivateCacheMap->ReadAheadOffset[1] = FileOffset2;
|
||
PrivateCacheMap->ReadAheadLength[1] = ReadAheadSize;
|
||
}
|
||
}
|
||
|
||
//
|
||
// Read Ahead Case 2.
|
||
//
|
||
// If this is the third read following a particular stride, then we
|
||
// will see if we can read ahead. One example of an application that
|
||
// might do this is a spreadsheet. Note that this code even works
|
||
// for negative strides.
|
||
//
|
||
|
||
else if ( ( NewOffset.QuadPart -
|
||
PrivateCacheMap->FileOffset2.QuadPart ) ==
|
||
( PrivateCacheMap->FileOffset2.QuadPart -
|
||
PrivateCacheMap->FileOffset1.QuadPart )) {
|
||
|
||
//
|
||
// According to the current stride, the next offset will be:
|
||
//
|
||
// NewOffset + (NewOffset - FileOffset2)
|
||
//
|
||
// which is the same as:
|
||
//
|
||
// (NewOffset * 2) - FileOffset2
|
||
//
|
||
|
||
FileOffset2.QuadPart = ( NewOffset.QuadPart << 1 ) - PrivateCacheMap->FileOffset2.QuadPart;
|
||
|
||
//
|
||
// If our stride is going backwards through the file, we
|
||
// have to detect the case where the next step would wrap.
|
||
//
|
||
|
||
if (FileOffset2.HighPart >= 0) {
|
||
|
||
//
|
||
// The read ahead length must be extended by the same amount that
|
||
// we will round the PrivateCacheMap->ReadAheadOffset down.
|
||
//
|
||
|
||
Length += FileOffset2.LowPart & (PAGE_SIZE - 1);
|
||
|
||
//
|
||
// Now round the PrivateCacheMap->ReadAheadOffset down.
|
||
//
|
||
|
||
FileOffset2.LowPart &= ~(PAGE_SIZE - 1);
|
||
PrivateCacheMap->ReadAheadOffset[1] = FileOffset2;
|
||
|
||
//
|
||
// Round to page boundary.
|
||
//
|
||
|
||
PrivateCacheMap->ReadAheadLength[1] = (ULONG) ROUND_TO_PAGES(Length);
|
||
Changed = TRUE;
|
||
}
|
||
}
|
||
|
||
//
|
||
// Get out if the ReadAhead requirements did not change.
|
||
//
|
||
|
||
if (!Changed || PrivateCacheMap->Flags.ReadAheadActive) {
|
||
|
||
DebugTrace( 0, me, "Read ahead already in progress or no change\n", 0 );
|
||
|
||
ExReleaseSpinLock( &PrivateCacheMap->ReadAheadSpinLock, OldIrql );
|
||
return;
|
||
}
|
||
|
||
//
|
||
// Otherwise, we will proceed and try to schedule the read ahead
|
||
// ourselves.
|
||
//
|
||
|
||
CC_SET_PRIVATE_CACHE_MAP (PrivateCacheMap, PRIVATE_CACHE_MAP_READ_AHEAD_ACTIVE);
|
||
|
||
//
|
||
// Release spin lock on way out
|
||
//
|
||
|
||
ExReleaseSpinLock( &PrivateCacheMap->ReadAheadSpinLock, OldIrql );
|
||
|
||
//
|
||
// Queue the read ahead request to the Lazy Writer's work queue.
|
||
//
|
||
|
||
DebugTrace( 0, me, "Queueing read ahead to worker thread\n", 0 );
|
||
|
||
WorkQueueEntry = CcAllocateWorkQueueEntry();
|
||
|
||
//
|
||
// If we failed to allocate a work queue entry, then, we will
|
||
// quietly bag it. Read ahead is only an optimization, and
|
||
// no one ever requires that it occur.
|
||
//
|
||
|
||
if (WorkQueueEntry != NULL) {
|
||
|
||
//
|
||
// We must reference this file object so that it cannot go away
|
||
// until we finish Read Ahead processing in the Worker Thread.
|
||
//
|
||
|
||
ObReferenceObject ( FileObject );
|
||
|
||
//
|
||
// Increment open count to make sure the SharedCacheMap stays around.
|
||
//
|
||
|
||
CcAcquireMasterLock( &OldIrql );
|
||
CcIncrementOpenCount( SharedCacheMap, 'adRQ' );
|
||
CcReleaseMasterLock( OldIrql );
|
||
|
||
WorkQueueEntry->Function = (UCHAR)ReadAhead;
|
||
WorkQueueEntry->Parameters.Read.FileObject = FileObject;
|
||
|
||
CcPostWorkQueue( WorkQueueEntry, &CcExpressWorkQueue );
|
||
}
|
||
|
||
//
|
||
// If we failed to allocate a Work Queue Entry, or all of the pages
|
||
// are resident we must set the active flag false.
|
||
//
|
||
|
||
else {
|
||
|
||
ExAcquireFastLock( &PrivateCacheMap->ReadAheadSpinLock, &OldIrql );
|
||
CC_CLEAR_PRIVATE_CACHE_MAP (PrivateCacheMap, PRIVATE_CACHE_MAP_READ_AHEAD_ACTIVE);
|
||
ExReleaseFastLock( &PrivateCacheMap->ReadAheadSpinLock, OldIrql );
|
||
}
|
||
|
||
DebugTrace(-1, me, "CcScheduleReadAhead -> VOID\n", 0 );
|
||
|
||
return;
|
||
}
|
||
|
||
|
||
VOID
|
||
FASTCALL
|
||
CcPerformReadAhead (
|
||
IN PFILE_OBJECT FileObject
|
||
)
|
||
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
This routine is called by the Lazy Writer to perform read ahead which
|
||
has been scheduled for this file by CcScheduleReadAhead.
|
||
|
||
Arguments:
|
||
|
||
FileObject - supplies pointer to FileObject on which readahead should be
|
||
considered.
|
||
|
||
Return Value:
|
||
|
||
None
|
||
--*/
|
||
|
||
{
|
||
KIRQL OldIrql;
|
||
PSHARED_CACHE_MAP SharedCacheMap;
|
||
PPRIVATE_CACHE_MAP PrivateCacheMap;
|
||
ULONG i;
|
||
LARGE_INTEGER ReadAheadOffset[2];
|
||
ULONG ReadAheadLength[2];
|
||
PCACHE_MANAGER_CALLBACKS Callbacks;
|
||
PVOID Context;
|
||
ULONG SavedState;
|
||
LOGICAL Done;
|
||
LOGICAL HitEof = FALSE;
|
||
LOGICAL ReadAheadPerformed = FALSE;
|
||
ULONG FaultOccurred = 0;
|
||
PETHREAD Thread = PsGetCurrentThread();
|
||
PVACB Vacb = NULL;
|
||
|
||
LOGICAL ResourceHeld = FALSE;
|
||
|
||
DebugTrace(+1, me, "CcPerformReadAhead:\n", 0 );
|
||
DebugTrace( 0, me, " FileObject = %08lx\n", FileObject );
|
||
|
||
MmSavePageFaultReadAhead( Thread, &SavedState );
|
||
|
||
try {
|
||
|
||
//
|
||
// Since we have the open count biased, we can safely access the
|
||
// SharedCacheMap.
|
||
//
|
||
|
||
SharedCacheMap = FileObject->SectionObjectPointer->SharedCacheMap;
|
||
|
||
Callbacks = SharedCacheMap->Callbacks;
|
||
Context = SharedCacheMap->LazyWriteContext;
|
||
|
||
//
|
||
// After the first time, keep looping as long as there are new
|
||
// read ahead requirements. (We will skip out below.)
|
||
//
|
||
|
||
while (TRUE) {
|
||
|
||
//
|
||
// Get SharedCacheMap and PrivateCacheMap. If either are now NULL, get
|
||
// out.
|
||
//
|
||
|
||
CcAcquireMasterLock( &OldIrql );
|
||
|
||
PrivateCacheMap = FileObject->PrivateCacheMap;
|
||
|
||
//
|
||
// Now capture the information that we need, so that we can drop the
|
||
// SharedList Resource. This information is advisory only anyway, and
|
||
// the caller must guarantee that the FileObject is referenced.
|
||
//
|
||
|
||
if (PrivateCacheMap != NULL) {
|
||
|
||
ExAcquireSpinLockAtDpcLevel( &PrivateCacheMap->ReadAheadSpinLock );
|
||
|
||
//
|
||
// We are done when the lengths are 0
|
||
//
|
||
|
||
Done = ((PrivateCacheMap->ReadAheadLength[0] |
|
||
PrivateCacheMap->ReadAheadLength[1]) == 0);
|
||
|
||
ReadAheadOffset[0] = PrivateCacheMap->ReadAheadOffset[0];
|
||
ReadAheadOffset[1] = PrivateCacheMap->ReadAheadOffset[1];
|
||
ReadAheadLength[0] = PrivateCacheMap->ReadAheadLength[0];
|
||
ReadAheadLength[1] = PrivateCacheMap->ReadAheadLength[1];
|
||
PrivateCacheMap->ReadAheadLength[0] = 0;
|
||
PrivateCacheMap->ReadAheadLength[1] = 0;
|
||
|
||
ExReleaseSpinLockFromDpcLevel( &PrivateCacheMap->ReadAheadSpinLock );
|
||
}
|
||
|
||
CcReleaseMasterLock( OldIrql );
|
||
|
||
//
|
||
// Acquire the file shared.
|
||
//
|
||
|
||
ResourceHeld = (*Callbacks->AcquireForReadAhead)( Context, TRUE );
|
||
|
||
if ((PrivateCacheMap == NULL) || Done || !ResourceHeld) {
|
||
|
||
try_return( NOTHING );
|
||
}
|
||
|
||
//
|
||
// PERFORM READ AHEAD
|
||
//
|
||
//
|
||
// Now loop until everything is read in. The Read ahead is accomplished
|
||
// by touching the pages with an appropriate ReadAhead parameter in MM.
|
||
//
|
||
|
||
i = 0;
|
||
|
||
do {
|
||
|
||
LARGE_INTEGER Offset, SavedOffset;
|
||
ULONG Length, SavedLength;
|
||
|
||
Offset = ReadAheadOffset[i];
|
||
Length = ReadAheadLength[i];
|
||
SavedOffset = Offset;
|
||
SavedLength = Length;
|
||
|
||
if ((Length != 0)
|
||
|
||
&&
|
||
|
||
( Offset.QuadPart <= SharedCacheMap->FileSize.QuadPart )) {
|
||
|
||
ReadAheadPerformed = TRUE;
|
||
|
||
//
|
||
// Keep length within file and MAX_READ_AHEAD
|
||
//
|
||
|
||
if ( ( Offset.QuadPart + (LONGLONG)Length ) >= SharedCacheMap->FileSize.QuadPart ) {
|
||
|
||
Length = (ULONG)( SharedCacheMap->FileSize.QuadPart - Offset.QuadPart );
|
||
HitEof = TRUE;
|
||
|
||
}
|
||
if (Length > MAX_READ_AHEAD) {
|
||
Length = MAX_READ_AHEAD;
|
||
}
|
||
|
||
//
|
||
// Now loop to read all of the desired data in. This loop
|
||
// is more or less like the same loop to read data in
|
||
// CcCopyRead, except that we do not copy anything, just
|
||
// unmap as soon as it is in.
|
||
//
|
||
|
||
while (Length != 0) {
|
||
|
||
ULONG ReceivedLength;
|
||
PVOID CacheBuffer;
|
||
ULONG PagesToGo;
|
||
|
||
//
|
||
// Call local routine to Map or Access the file data.
|
||
// If we cannot map the data because of a Wait condition,
|
||
// return FALSE.
|
||
//
|
||
// Since this routine is intended to be called from
|
||
// the finally handler from file system read modules,
|
||
// it is imperative that it not raise any exceptions.
|
||
// Therefore, if any expected exception is raised, we
|
||
// will simply get out.
|
||
//
|
||
|
||
CacheBuffer = CcGetVirtualAddress( SharedCacheMap,
|
||
Offset,
|
||
&Vacb,
|
||
&ReceivedLength );
|
||
|
||
//
|
||
// If we got more than we need, make sure to only transfer
|
||
// the right amount.
|
||
//
|
||
|
||
if (ReceivedLength > Length) {
|
||
ReceivedLength = Length;
|
||
}
|
||
|
||
//
|
||
// Now loop to touch all of the pages, calling MM to insure
|
||
// that if we fault, we take in exactly the number of pages
|
||
// we need.
|
||
//
|
||
|
||
PagesToGo = ADDRESS_AND_SIZE_TO_SPAN_PAGES( CacheBuffer,
|
||
ReceivedLength );
|
||
|
||
CcMissCounter = &CcReadAheadIos;
|
||
|
||
while (PagesToGo) {
|
||
|
||
MmSetPageFaultReadAhead( Thread, (PagesToGo - 1) );
|
||
FaultOccurred |= !MmCheckCachedPageState(CacheBuffer, FALSE);
|
||
|
||
CacheBuffer = (PCHAR)CacheBuffer + PAGE_SIZE;
|
||
PagesToGo -= 1;
|
||
}
|
||
CcMissCounter = &CcThrowAway;
|
||
|
||
//
|
||
// Calculate how much data we have left to go.
|
||
//
|
||
|
||
Length -= ReceivedLength;
|
||
|
||
//
|
||
// Assume we did not get all the data we wanted, and set
|
||
// Offset to the end of the returned data.
|
||
//
|
||
|
||
Offset.QuadPart = Offset.QuadPart + (LONGLONG)ReceivedLength;
|
||
|
||
//
|
||
// It was only a page, so we can just leave this loop
|
||
// After freeing the address.
|
||
//
|
||
|
||
CcFreeVirtualAddress( Vacb );
|
||
Vacb = NULL;
|
||
}
|
||
}
|
||
i += 1;
|
||
} while (i <= 1);
|
||
|
||
//
|
||
// Release the file
|
||
//
|
||
|
||
(*Callbacks->ReleaseFromReadAhead)( Context );
|
||
ResourceHeld = FALSE;
|
||
}
|
||
|
||
try_exit: NOTHING;
|
||
}
|
||
finally {
|
||
|
||
MmResetPageFaultReadAhead(Thread, SavedState);
|
||
CcMissCounter = &CcThrowAway;
|
||
|
||
//
|
||
// If we got an error faulting a single page in, release the Vacb
|
||
// here. It is important to free any mapping before dropping the
|
||
// resource to prevent purge problems.
|
||
//
|
||
|
||
if (Vacb != NULL) {
|
||
CcFreeVirtualAddress( Vacb );
|
||
}
|
||
|
||
//
|
||
// Release the file
|
||
//
|
||
|
||
if (ResourceHeld) {
|
||
(*Callbacks->ReleaseFromReadAhead)( Context );
|
||
}
|
||
|
||
//
|
||
// To show we are done, we must make sure the PrivateCacheMap is
|
||
// still there.
|
||
//
|
||
|
||
CcAcquireMasterLock( &OldIrql );
|
||
|
||
PrivateCacheMap = FileObject->PrivateCacheMap;
|
||
|
||
//
|
||
// Show readahead is going inactive.
|
||
//
|
||
|
||
if (PrivateCacheMap != NULL) {
|
||
|
||
ExAcquireSpinLockAtDpcLevel( &PrivateCacheMap->ReadAheadSpinLock );
|
||
CC_CLEAR_PRIVATE_CACHE_MAP (PrivateCacheMap, PRIVATE_CACHE_MAP_READ_AHEAD_ACTIVE);
|
||
|
||
//
|
||
// If he said sequential only and we smashed into Eof, then
|
||
// let's reset the highwater mark in case he wants to read the
|
||
// file sequentially again.
|
||
//
|
||
|
||
if (HitEof && FlagOn(FileObject->Flags, FO_SEQUENTIAL_ONLY)) {
|
||
PrivateCacheMap->ReadAheadOffset[1].LowPart =
|
||
PrivateCacheMap->ReadAheadOffset[1].HighPart = 0;
|
||
}
|
||
|
||
//
|
||
// If no faults occurred, turn read ahead off.
|
||
//
|
||
|
||
if (ReadAheadPerformed && !FaultOccurred) {
|
||
CC_CLEAR_PRIVATE_CACHE_MAP (PrivateCacheMap, PRIVATE_CACHE_MAP_READ_AHEAD_ENABLED);
|
||
}
|
||
|
||
ExReleaseSpinLockFromDpcLevel( &PrivateCacheMap->ReadAheadSpinLock );
|
||
}
|
||
|
||
//
|
||
// Free SharedCacheMap list
|
||
//
|
||
|
||
CcReleaseMasterLock( OldIrql );
|
||
|
||
ObDereferenceObject( FileObject );
|
||
|
||
//
|
||
// Serialize again to decrement the open count.
|
||
//
|
||
|
||
CcAcquireMasterLock( &OldIrql );
|
||
|
||
CcDecrementOpenCount( SharedCacheMap, 'adRP' );
|
||
|
||
if ((SharedCacheMap->OpenCount == 0) &&
|
||
!FlagOn(SharedCacheMap->Flags, WRITE_QUEUED) &&
|
||
(SharedCacheMap->DirtyPages == 0)) {
|
||
|
||
//
|
||
// Move to the dirty list.
|
||
//
|
||
|
||
RemoveEntryList( &SharedCacheMap->SharedCacheMapLinks );
|
||
InsertTailList( &CcDirtySharedCacheMapList.SharedCacheMapLinks,
|
||
&SharedCacheMap->SharedCacheMapLinks );
|
||
|
||
//
|
||
// Make sure the Lazy Writer will wake up, because we
|
||
// want him to delete this SharedCacheMap.
|
||
//
|
||
|
||
LazyWriter.OtherWork = TRUE;
|
||
if (!LazyWriter.ScanActive) {
|
||
CcScheduleLazyWriteScan( FALSE );
|
||
}
|
||
}
|
||
|
||
CcReleaseMasterLock( OldIrql );
|
||
}
|
||
|
||
DebugTrace(-1, me, "CcPerformReadAhead -> VOID\n", 0 );
|
||
|
||
return;
|
||
}
|
||
|
||
|
||
PBITMAP_RANGE
|
||
CcFindBitmapRangeToDirty (
|
||
IN PMBCB Mbcb,
|
||
IN LONGLONG Page,
|
||
IN PULONG *FreePageForSetting
|
||
)
|
||
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
This routine looks for the bitmap range containing the specified page.
|
||
If it is found it is returned so the caller can set some dirty bits.
|
||
If it is not found, then an attempt is made to come up with a free range
|
||
and set it up to describe the desired range. To come up with a free range,
|
||
first we attempt to recycle the lowest range that does not currently contain
|
||
any dirty pages. If there is no such range, then we allocate one.
|
||
|
||
Arguments:
|
||
|
||
Mbcb - Supplies the Mbcb in which to find the range.
|
||
|
||
Page - Supplies the page number for the first page to be set dirty.
|
||
|
||
FreePageForSetting - Supplies a free bitmap page of zeros from the zone; the
|
||
caller's pointer is cleared on return if this page is used.
|
||
|
||
Return Value:
|
||
|
||
The desired bitmap range, or NULL if one could not be allocated.
|
||
|
||
Environment:
|
||
|
||
The BcbSpinLock must be held on entry.
|
||
|
||
--*/
|
||
|
||
{
|
||
PBITMAP_RANGE BitmapRange, FreeRange;
|
||
PLIST_ENTRY InsertPoint;
|
||
LONGLONG BasePage;
|
||
|
||
//
|
||
// Initialize FreeRange and InsertPoint for the case we have
|
||
// to initialize a range.
|
||
//
|
||
|
||
FreeRange = NULL;
|
||
InsertPoint = &Mbcb->BitmapRanges;
|
||
|
||
//
|
||
// Point to the first bitmap range.
|
||
//
|
||
|
||
BitmapRange = (PBITMAP_RANGE)InsertPoint->Flink;
|
||
|
||
//
|
||
// Calculate the desired BasePage from the caller's page.
|
||
//
|
||
|
||
BasePage = (Page & ~(LONGLONG)((MBCB_BITMAP_BLOCK_SIZE * 8) - 1));
|
||
|
||
//
|
||
// Loop through the list until we find the range or we have a free range
|
||
// and correct insertion point.
|
||
//
|
||
|
||
do {
|
||
|
||
//
|
||
// If we get an exact match, then we must have hit a fully-initialized
|
||
// range which we can return.
|
||
//
|
||
|
||
if (BasePage == BitmapRange->BasePage) {
|
||
return BitmapRange;
|
||
|
||
//
|
||
// Otherwise, see if the range is free and we have not captured a
|
||
// free range yet.
|
||
//
|
||
|
||
} else if ((BitmapRange->DirtyPages == 0) && (FreeRange == NULL)) {
|
||
FreeRange = BitmapRange;
|
||
|
||
//
|
||
// If we did not capture a free range, see if we need to update our
|
||
// insertion point.
|
||
//
|
||
|
||
} else if (BasePage > BitmapRange->BasePage) {
|
||
InsertPoint = &BitmapRange->Links;
|
||
}
|
||
|
||
//
|
||
// Advance to the next range (or possibly back to the listhead).
|
||
//
|
||
|
||
BitmapRange = (PBITMAP_RANGE)BitmapRange->Links.Flink;
|
||
|
||
//
|
||
// Loop until we hit the end, or we know we are done updating both InsertPoint
|
||
// and FreeRange.
|
||
//
|
||
|
||
} while ((BitmapRange != (PBITMAP_RANGE)&Mbcb->BitmapRanges) &&
|
||
((BasePage >= BitmapRange->BasePage) ||
|
||
(FreeRange == NULL)));
|
||
|
||
//
|
||
// If we found a FreeRange we can use, then remove it from the list.
|
||
//
|
||
|
||
if (FreeRange != NULL) {
|
||
RemoveEntryList( &FreeRange->Links );
|
||
|
||
//
|
||
// Otherwise we have to allocate the small bitmap range structure. We usually
|
||
// try to avoid calling the pool package while owning a spin lock, but note the
|
||
// following things which must be true if we hit this point:
|
||
//
|
||
// The file is larger than 3 bitmap ranges (normally 384MB on Intel).
|
||
// Three ranges plus all previously allocated ranges are simultaneously dirty.
|
||
//
|
||
// The second point is fairly unlikely, especially for a sequential writer. It
|
||
// can occur for a random writer in a large file, but eventually we will allocate
|
||
// enough ranges to always describe how many ranges he can keep dirty at once!
|
||
//
|
||
|
||
} else {
|
||
FreeRange = ExAllocatePoolWithTag( NonPagedPool, sizeof(BITMAP_RANGE), 'rBcC' );
|
||
if (FreeRange == NULL) {
|
||
return NULL;
|
||
}
|
||
RtlZeroMemory( FreeRange, sizeof(BITMAP_RANGE) );
|
||
}
|
||
|
||
//
|
||
// Insert and initialize.
|
||
//
|
||
|
||
InsertHeadList( InsertPoint, &FreeRange->Links );
|
||
FreeRange->BasePage = BasePage;
|
||
FreeRange->FirstDirtyPage = MAXULONG;
|
||
FreeRange->LastDirtyPage = 0;
|
||
|
||
//
|
||
// If the range does not have a bitmap yet, then consume the one we were passed
|
||
// in.
|
||
//
|
||
|
||
if (FreeRange->Bitmap == NULL) {
|
||
ASSERT(*FreePageForSetting != NULL);
|
||
FreeRange->Bitmap = *FreePageForSetting;
|
||
*FreePageForSetting = NULL;
|
||
}
|
||
|
||
return FreeRange;
|
||
}
|
||
|
||
|
||
PBITMAP_RANGE
|
||
CcFindBitmapRangeToClean (
|
||
IN PMBCB Mbcb,
|
||
IN LONGLONG Page
|
||
)
|
||
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
This routine starts from the specified page, and looks for a range with dirty
|
||
pages. The caller must guarantee that some range exists with dirty pages. If
|
||
the end of the ranges is hit before finding any dirty ranges, then this routine
|
||
loops back to the start of the range list.
|
||
|
||
Arguments:
|
||
|
||
Mbcb - Supplies the Mbcb in which to find the range.
|
||
|
||
Page - Supplies the page number for the first page to scan from.
|
||
|
||
Return Value:
|
||
|
||
The desired bitmap range with dirty pages.
|
||
|
||
Environment:
|
||
|
||
The BcbSpinLock must be held on entry.
|
||
|
||
--*/
|
||
|
||
{
|
||
PBITMAP_RANGE BitmapRange;
|
||
|
||
//
|
||
// Point to the first bitmap range.
|
||
//
|
||
|
||
BitmapRange = (PBITMAP_RANGE)Mbcb->BitmapRanges.Flink;
|
||
|
||
//
|
||
// Loop through the list until we find the range to return.
|
||
//
|
||
|
||
do {
|
||
|
||
//
|
||
// If we hit the listhead, then wrap to find the first dirty range.
|
||
//
|
||
|
||
if (BitmapRange == (PBITMAP_RANGE)&Mbcb->BitmapRanges) {
|
||
|
||
//
|
||
// If Page is already 0, we are in an infinite loop.
|
||
//
|
||
|
||
ASSERT(Page != 0);
|
||
|
||
//
|
||
// Clear Page and fall through to advance to first range.
|
||
//
|
||
|
||
Page = 0;
|
||
|
||
|
||
//
|
||
// Otherwise, if we are in range, return the first range
|
||
// with dirty pages.
|
||
//
|
||
|
||
} else if ((Page <= (BitmapRange->BasePage + BitmapRange->LastDirtyPage)) &&
|
||
(BitmapRange->DirtyPages != 0)) {
|
||
return BitmapRange;
|
||
}
|
||
|
||
//
|
||
// Advance to the next range (or possibly back to the listhead).
|
||
//
|
||
|
||
BitmapRange = (PBITMAP_RANGE)BitmapRange->Links.Flink;
|
||
|
||
} while (TRUE);
|
||
}
|
||
|
||
|
||
VOID
|
||
CcSetDirtyInMask (
|
||
IN PSHARED_CACHE_MAP SharedCacheMap,
|
||
IN PLARGE_INTEGER FileOffset,
|
||
IN ULONG Length
|
||
)
|
||
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
This routine may be called to set a range of pages dirty in a user data
|
||
file, by just setting the corresponding bits in the mask bcb.
|
||
|
||
IMPORTANT NOTE:
|
||
|
||
If this routine fails to set any bits due to an allocation failure,
|
||
it just returns quietly without informing the caller. (Note that this
|
||
routine is never called for no modified write sections.) The reason
|
||
for this behavior is that this routine is sometimes called as part of
|
||
error recovery (CcFreeActiveVacb, CcMdlWriteComplete, etc.) when it is
|
||
essential to just keep on moving. Note that if an allocation failure does
|
||
occur, this only means that MM will have to flush the modified page in
|
||
time, since the Lazy Writer will not do it.
|
||
|
||
Arguments:
|
||
|
||
SharedCacheMap - SharedCacheMap where the pages are to be set dirty.
|
||
|
||
FileOffset - FileOffset of first page to set dirty
|
||
|
||
Length - Used in conjunction with FileOffset to determine how many pages
|
||
to set dirty.
|
||
|
||
Return Value:
|
||
|
||
None
|
||
|
||
--*/
|
||
|
||
{
|
||
KLOCK_QUEUE_HANDLE LockHandle;
|
||
PMBCB Mbcb;
|
||
PBITMAP_RANGE BitmapRange;
|
||
LONGLONG FirstPage;
|
||
LONGLONG LastPage;
|
||
PULONG MaskPtr;
|
||
ULONG Mask = 0;
|
||
PULONG Bitmap = NULL;
|
||
|
||
//
|
||
// We assume no caller can cross a bitmap range boundary (currently not even
|
||
// a view boundary!), so we do not want to loop through bitmap ranges.
|
||
//
|
||
|
||
ASSERT((FileOffset->QuadPart / MBCB_BITMAP_RANGE) ==
|
||
((FileOffset->QuadPart + Length - 1) / MBCB_BITMAP_RANGE));
|
||
|
||
//
|
||
// Initialize our locals.
|
||
//
|
||
|
||
FirstPage = FileOffset->QuadPart >> PAGE_SHIFT;
|
||
LastPage = ((FileOffset->QuadPart + Length - 1) >> PAGE_SHIFT);
|
||
|
||
//
|
||
// PREfix correctly notes that Mbcb grande promotion test and the one
|
||
// that decides to preallocate the bitmap buffer ever disagree, we will
|
||
// be able to have a NULL Bitmap and die. This will not happen since we
|
||
// guarantee that section size >= filesize. Assert this case, and we will
|
||
// also assert that Bitmap is never NULL when needed - this should convince
|
||
// PREfix we're OK.
|
||
//
|
||
|
||
ASSERT( (SharedCacheMap->SectionSize.QuadPart / PAGE_SIZE) > LastPage );
|
||
|
||
//
|
||
// If we have to convert to an Mbcb grande, we will loop back here to
|
||
// preallocate another buffer.
|
||
//
|
||
|
||
do {
|
||
|
||
//
|
||
// For large streams, we need to preallocate a block we use for
|
||
// we use for bitmaps. We allocate one, then loop back in the rare
|
||
// case where we will need another. We free it at the bottom if we
|
||
// don't need one.
|
||
//
|
||
|
||
if (SharedCacheMap->SectionSize.QuadPart > (MBCB_BITMAP_INITIAL_SIZE * 8 * PAGE_SIZE)) {
|
||
|
||
//
|
||
// If we could not preallocate, break out into common cleanup code and
|
||
// return quietly.
|
||
//
|
||
|
||
if (!CcPrefillVacbLevelZone( 1, &LockHandle.OldIrql, FALSE )) {
|
||
return;
|
||
}
|
||
|
||
Bitmap = (PULONG)CcAllocateVacbLevel( FALSE );
|
||
CcReleaseVacbLock( LockHandle.OldIrql );
|
||
}
|
||
|
||
//
|
||
// Acquire the Mbcb spinlock.
|
||
//
|
||
|
||
KeAcquireInStackQueuedSpinLock( &SharedCacheMap->BcbSpinLock, &LockHandle );
|
||
|
||
//
|
||
// If there is no Mbcb, we will have to allocate one.
|
||
//
|
||
|
||
Mbcb = SharedCacheMap->Mbcb;
|
||
if (Mbcb == NULL) {
|
||
|
||
//
|
||
// Since we use the Bcb zone, we must assume that Bcbs are big enough.
|
||
//
|
||
|
||
ASSERT(QuadAlign(sizeof(MBCB)) <= QuadAlign(sizeof(BCB)));
|
||
|
||
//
|
||
// Allocate the Mbcb from the Bcb zone.
|
||
//
|
||
|
||
Mbcb = (PMBCB)CcAllocateInitializeBcb( NULL, NULL, NULL, NULL );
|
||
|
||
//
|
||
// If we could not allocate an Mbcb, break out to clean up and return
|
||
//
|
||
|
||
if (Mbcb == NULL) {
|
||
break;
|
||
}
|
||
|
||
//
|
||
// Set in the node type, and initialize the listhead of ranges.
|
||
//
|
||
|
||
Mbcb->NodeTypeCode = CACHE_NTC_MBCB;
|
||
InitializeListHead( &Mbcb->BitmapRanges );
|
||
|
||
//
|
||
// Insert and initialize the first range.
|
||
//
|
||
|
||
InsertTailList( &Mbcb->BitmapRanges, &Mbcb->BitmapRange1.Links );
|
||
Mbcb->BitmapRange1.FirstDirtyPage = MAXULONG;
|
||
|
||
//
|
||
// Use the rest of the Mbcb as the initial bitmap.
|
||
//
|
||
|
||
Mbcb->BitmapRange1.Bitmap = (PULONG)&Mbcb->BitmapRange2;
|
||
|
||
//
|
||
// Now set to use our new Mbcb.
|
||
//
|
||
|
||
SharedCacheMap->Mbcb = Mbcb;
|
||
}
|
||
|
||
//
|
||
// Now see if we need to switch to the Mbcb grande format.
|
||
//
|
||
|
||
if ((LastPage >= (MBCB_BITMAP_INITIAL_SIZE * 8)) &&
|
||
(Mbcb->NodeTypeCode != CACHE_NTC_MBCB_GRANDE)) {
|
||
|
||
ASSERT( Bitmap != NULL );
|
||
|
||
//
|
||
// If there are any dirty pages, copy the initial bitmap over, and zero
|
||
// out the original end of the Mbcb for reuse.
|
||
//
|
||
|
||
if (Mbcb->BitmapRange1.DirtyPages != 0) {
|
||
RtlCopyMemory( Bitmap, Mbcb->BitmapRange1.Bitmap, MBCB_BITMAP_INITIAL_SIZE );
|
||
RtlZeroMemory( Mbcb->BitmapRange1.Bitmap, MBCB_BITMAP_INITIAL_SIZE );
|
||
}
|
||
|
||
//
|
||
// Store the new bitmap pointer and show we have consumed this one.
|
||
//
|
||
|
||
Mbcb->BitmapRange1.Bitmap = Bitmap;
|
||
Bitmap = NULL;
|
||
|
||
//
|
||
// Insert and initialize the first range.
|
||
//
|
||
|
||
InsertTailList( &Mbcb->BitmapRanges, &Mbcb->BitmapRange2.Links );
|
||
Mbcb->BitmapRange2.BasePage = MAXLONGLONG;
|
||
Mbcb->BitmapRange2.FirstDirtyPage = MAXULONG;
|
||
InsertTailList( &Mbcb->BitmapRanges, &Mbcb->BitmapRange3.Links );
|
||
Mbcb->BitmapRange3.BasePage = MAXLONGLONG;
|
||
Mbcb->BitmapRange3.FirstDirtyPage = MAXULONG;
|
||
Mbcb->NodeTypeCode = CACHE_NTC_MBCB_GRANDE;
|
||
|
||
//
|
||
// This is a one-time event - converting to the large Mbcb. Continue back
|
||
// to preallocate another buffer for CcFindBitmapRangeToDirty.
|
||
//
|
||
|
||
KeReleaseInStackQueuedSpinLock( &LockHandle );
|
||
continue;
|
||
}
|
||
|
||
//
|
||
// Now find the Bitmap range we are setting bits in.
|
||
//
|
||
|
||
BitmapRange = CcFindBitmapRangeToDirty( Mbcb, FirstPage, &Bitmap );
|
||
|
||
//
|
||
// If we could not allocate this dinky structure, break out quietly.
|
||
//
|
||
|
||
if (BitmapRange == NULL) {
|
||
break;
|
||
}
|
||
|
||
//
|
||
// Now update the first and last dirty page indices and the bitmap.
|
||
//
|
||
|
||
if (FirstPage < (BitmapRange->BasePage + BitmapRange->FirstDirtyPage)) {
|
||
BitmapRange->FirstDirtyPage = (ULONG)(FirstPage - BitmapRange->BasePage);
|
||
}
|
||
|
||
if (LastPage > (BitmapRange->BasePage + BitmapRange->LastDirtyPage)) {
|
||
BitmapRange->LastDirtyPage = (ULONG)(LastPage - BitmapRange->BasePage);
|
||
}
|
||
|
||
//
|
||
// We have to acquire the shared cache map list, because we
|
||
// may be changing lists.
|
||
//
|
||
|
||
CcAcquireMasterLockAtDpcLevel();
|
||
|
||
//
|
||
// If this is the first dirty page for this cache map, there is some work
|
||
// to do.
|
||
//
|
||
|
||
if (SharedCacheMap->DirtyPages == 0) {
|
||
|
||
//
|
||
// If the lazy write scan is not active, then start it.
|
||
//
|
||
|
||
if (!LazyWriter.ScanActive) {
|
||
CcScheduleLazyWriteScan( FALSE );
|
||
}
|
||
|
||
//
|
||
// Move to the dirty list.
|
||
//
|
||
|
||
RemoveEntryList( &SharedCacheMap->SharedCacheMapLinks );
|
||
InsertTailList( &CcDirtySharedCacheMapList.SharedCacheMapLinks,
|
||
&SharedCacheMap->SharedCacheMapLinks );
|
||
|
||
Mbcb->ResumeWritePage = FirstPage;
|
||
}
|
||
|
||
MaskPtr = &BitmapRange->Bitmap[(ULONG)(FirstPage - BitmapRange->BasePage) / 32];
|
||
Mask = 1 << ((ULONG)FirstPage % 32);
|
||
|
||
//
|
||
// Loop to set all of the bits and adjust the DirtyPage totals.
|
||
//
|
||
|
||
for ( ; FirstPage <= LastPage; FirstPage++) {
|
||
|
||
if ((*MaskPtr & Mask) == 0) {
|
||
CcChargeMaskDirtyPages( SharedCacheMap, Mbcb, BitmapRange, 1 );
|
||
*MaskPtr |= Mask;
|
||
}
|
||
|
||
Mask <<= 1;
|
||
|
||
if (Mask == 0) {
|
||
|
||
MaskPtr += 1;
|
||
Mask = 1;
|
||
}
|
||
}
|
||
|
||
//
|
||
// See if we need to advance our goal for ValidDataLength.
|
||
//
|
||
|
||
LastPage = FileOffset->QuadPart + Length;
|
||
|
||
if (LastPage > SharedCacheMap->ValidDataGoal.QuadPart) {
|
||
SharedCacheMap->ValidDataGoal.QuadPart = (LONGLONG)LastPage;
|
||
}
|
||
|
||
CcReleaseMasterLockFromDpcLevel();
|
||
|
||
//
|
||
// Continue until we have actually set the bits (there is a continue
|
||
// which just wants to loop back and allocate another buffer).
|
||
//
|
||
|
||
} while (Mask == 0);
|
||
|
||
//
|
||
// Now if we preallocated a bitmap buffer, free it on the way out.
|
||
//
|
||
|
||
if (Bitmap != NULL) {
|
||
CcAcquireVacbLockAtDpcLevel();
|
||
CcDeallocateVacbLevel( (PVACB *)Bitmap, FALSE );
|
||
CcReleaseVacbLockFromDpcLevel();
|
||
}
|
||
KeReleaseInStackQueuedSpinLock( &LockHandle );
|
||
}
|
||
|
||
|
||
VOID
|
||
CcSetDirtyPinnedData (
|
||
IN PVOID BcbVoid,
|
||
IN PLARGE_INTEGER Lsn OPTIONAL
|
||
)
|
||
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
This routine may be called to set a Bcb (returned by CcPinFileData)
|
||
dirty, and a candidate for the Lazy Writer. All Bcbs should be set
|
||
dirty by calling this routine, even if they are to be flushed
|
||
another way.
|
||
|
||
Arguments:
|
||
|
||
Bcb - Supplies a pointer to a pinned (by CcPinFileData) Bcb, to
|
||
be set dirty.
|
||
|
||
Lsn - Lsn to be remembered with page.
|
||
|
||
Return Value:
|
||
|
||
None
|
||
|
||
--*/
|
||
|
||
{
|
||
PBCB Bcbs[2];
|
||
PBCB *BcbPtrPtr;
|
||
KLOCK_QUEUE_HANDLE LockHandle;
|
||
PSHARED_CACHE_MAP SharedCacheMap;
|
||
|
||
DebugTrace(+1, me, "CcSetDirtyPinnedData: Bcb = %08lx\n", BcbVoid );
|
||
|
||
//
|
||
// Assume this is a normal Bcb, and set up for loop below.
|
||
//
|
||
|
||
Bcbs[0] = (PBCB)BcbVoid;
|
||
Bcbs[1] = NULL;
|
||
BcbPtrPtr = &Bcbs[0];
|
||
|
||
//
|
||
// If it is an overlap Bcb, then point into the Bcb vector
|
||
// for the loop.
|
||
//
|
||
|
||
if (Bcbs[0]->NodeTypeCode == CACHE_NTC_OBCB) {
|
||
BcbPtrPtr = &((POBCB)Bcbs[0])->Bcbs[0];
|
||
}
|
||
|
||
//
|
||
// Loop to set all Bcbs dirty
|
||
//
|
||
|
||
while (*BcbPtrPtr != NULL) {
|
||
|
||
Bcbs[0] = *(BcbPtrPtr++);
|
||
|
||
//
|
||
// Should be no ReadOnly Bcbs
|
||
//
|
||
|
||
ASSERT(((ULONG_PTR)Bcbs[0] & 1) != 1);
|
||
|
||
SharedCacheMap = Bcbs[0]->SharedCacheMap;
|
||
|
||
//
|
||
// We have to acquire the shared cache map list, because we
|
||
// may be changing lists.
|
||
//
|
||
|
||
KeAcquireInStackQueuedSpinLock( &SharedCacheMap->BcbSpinLock, &LockHandle );
|
||
|
||
if (!Bcbs[0]->Dirty) {
|
||
|
||
ULONG Pages = Bcbs[0]->ByteLength >> PAGE_SHIFT;
|
||
|
||
//
|
||
// Set dirty to keep the Bcb from going away until
|
||
// it is set Undirty, and assign the next modification time stamp.
|
||
//
|
||
|
||
Bcbs[0]->Dirty = TRUE;
|
||
|
||
//
|
||
// Initialize the OldestLsn field.
|
||
//
|
||
|
||
if (ARGUMENT_PRESENT(Lsn)) {
|
||
Bcbs[0]->OldestLsn = *Lsn;
|
||
Bcbs[0]->NewestLsn = *Lsn;
|
||
}
|
||
|
||
//
|
||
// Move it to the dirty list if these are the first dirty pages,
|
||
// and this is not disabled for write behind.
|
||
//
|
||
// Increase the count of dirty bytes in the shared cache map.
|
||
//
|
||
|
||
CcAcquireMasterLockAtDpcLevel();
|
||
if ((SharedCacheMap->DirtyPages == 0) &&
|
||
!FlagOn(SharedCacheMap->Flags, DISABLE_WRITE_BEHIND)) {
|
||
|
||
//
|
||
// If the lazy write scan is not active, then start it.
|
||
//
|
||
|
||
if (!LazyWriter.ScanActive) {
|
||
CcScheduleLazyWriteScan( FALSE );
|
||
}
|
||
|
||
RemoveEntryList( &SharedCacheMap->SharedCacheMapLinks );
|
||
InsertTailList( &CcDirtySharedCacheMapList.SharedCacheMapLinks,
|
||
&SharedCacheMap->SharedCacheMapLinks );
|
||
}
|
||
|
||
CcChargePinDirtyPages( SharedCacheMap, Pages );
|
||
CcReleaseMasterLockFromDpcLevel();
|
||
}
|
||
|
||
//
|
||
// If this Lsn happens to be older/newer than the ones we have stored, then
|
||
// change it.
|
||
//
|
||
|
||
if (ARGUMENT_PRESENT(Lsn)) {
|
||
|
||
if ((Bcbs[0]->OldestLsn.QuadPart == 0) || (Lsn->QuadPart < Bcbs[0]->OldestLsn.QuadPart)) {
|
||
Bcbs[0]->OldestLsn = *Lsn;
|
||
}
|
||
|
||
if (Lsn->QuadPart > Bcbs[0]->NewestLsn.QuadPart) {
|
||
Bcbs[0]->NewestLsn = *Lsn;
|
||
}
|
||
}
|
||
|
||
//
|
||
// See if we need to advance our goal for ValidDataLength.
|
||
//
|
||
|
||
if ( Bcbs[0]->BeyondLastByte.QuadPart > SharedCacheMap->ValidDataGoal.QuadPart ) {
|
||
|
||
SharedCacheMap->ValidDataGoal = Bcbs[0]->BeyondLastByte;
|
||
}
|
||
|
||
KeReleaseInStackQueuedSpinLock( &LockHandle );
|
||
}
|
||
|
||
DebugTrace(-1, me, "CcSetDirtyPinnedData -> VOID\n", 0 );
|
||
}
|
||
|
||
|
||
NTSTATUS
|
||
CcSetValidData (
|
||
IN PFILE_OBJECT FileObject,
|
||
IN PLARGE_INTEGER ValidDataLength
|
||
)
|
||
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
This routine is used to call the File System to update ValidDataLength
|
||
for a file.
|
||
|
||
Arguments:
|
||
|
||
FileObject - A pointer to a referenced file object describing which file
|
||
the read should be performed from.
|
||
|
||
ValidDataLength - Pointer to new ValidDataLength.
|
||
|
||
Return Value:
|
||
|
||
Status of operation.
|
||
|
||
--*/
|
||
|
||
{
|
||
PIO_STACK_LOCATION IrpSp;
|
||
PDEVICE_OBJECT DeviceObject;
|
||
NTSTATUS Status;
|
||
FILE_END_OF_FILE_INFORMATION Buffer;
|
||
IO_STATUS_BLOCK IoStatus;
|
||
KEVENT Event;
|
||
PIRP Irp;
|
||
|
||
DebugTrace(+1, me, "CcSetValidData:\n", 0 );
|
||
DebugTrace( 0, me, " FileObject = %08lx\n", FileObject );
|
||
DebugTrace2(0, me, " ValidDataLength = %08lx, %08lx\n",
|
||
ValidDataLength->LowPart, ValidDataLength->HighPart );
|
||
|
||
//
|
||
// Copy ValidDataLength to our buffer.
|
||
//
|
||
|
||
Buffer.EndOfFile = *ValidDataLength;
|
||
|
||
//
|
||
// Initialize the event.
|
||
//
|
||
|
||
KeInitializeEvent( &Event, NotificationEvent, FALSE );
|
||
|
||
//
|
||
// Begin by getting a pointer to the device object that the file resides
|
||
// on.
|
||
//
|
||
|
||
DeviceObject = IoGetRelatedDeviceObject( FileObject );
|
||
|
||
//
|
||
// Allocate an I/O Request Packet (IRP) for this in-page operation.
|
||
//
|
||
|
||
Irp = IoAllocateIrp( DeviceObject->StackSize, FALSE );
|
||
if (Irp == NULL) {
|
||
|
||
DebugTrace(-1, me, "CcSetValidData-> STATUS_INSUFFICIENT_RESOURCES\n", 0 );
|
||
|
||
return STATUS_INSUFFICIENT_RESOURCES;
|
||
}
|
||
|
||
//
|
||
// Get a pointer to the first stack location in the packet. This location
|
||
// will be used to pass the function codes and parameters to the first
|
||
// driver.
|
||
//
|
||
|
||
IrpSp = IoGetNextIrpStackLocation( Irp );
|
||
|
||
//
|
||
// Fill in the IRP according to this request, setting the flags to
|
||
// just cause IO to set the event and deallocate the Irp.
|
||
//
|
||
|
||
Irp->Flags = IRP_PAGING_IO | IRP_SYNCHRONOUS_PAGING_IO;
|
||
Irp->RequestorMode = KernelMode;
|
||
Irp->UserIosb = &IoStatus;
|
||
Irp->UserEvent = &Event;
|
||
Irp->Tail.Overlay.OriginalFileObject = FileObject;
|
||
Irp->Tail.Overlay.Thread = PsGetCurrentThread();
|
||
Irp->AssociatedIrp.SystemBuffer = &Buffer;
|
||
|
||
//
|
||
// Fill in the normal read parameters.
|
||
//
|
||
|
||
IrpSp->MajorFunction = IRP_MJ_SET_INFORMATION;
|
||
IrpSp->FileObject = FileObject;
|
||
IrpSp->DeviceObject = DeviceObject;
|
||
IrpSp->Parameters.SetFile.Length = sizeof(FILE_END_OF_FILE_INFORMATION);
|
||
IrpSp->Parameters.SetFile.FileInformationClass = FileEndOfFileInformation;
|
||
IrpSp->Parameters.SetFile.FileObject = NULL;
|
||
IrpSp->Parameters.SetFile.AdvanceOnly = TRUE;
|
||
|
||
//
|
||
// Queue the packet to the appropriate driver based on whether or not there
|
||
// is a VPB associated with the device. This routine should not raise.
|
||
//
|
||
|
||
Status = IoCallDriver( DeviceObject, Irp );
|
||
|
||
//
|
||
// If pending is returned (which is a successful status),
|
||
// we must wait for the request to complete.
|
||
//
|
||
|
||
if (Status == STATUS_PENDING) {
|
||
KeWaitForSingleObject( &Event,
|
||
Executive,
|
||
KernelMode,
|
||
FALSE,
|
||
(PLARGE_INTEGER)NULL);
|
||
}
|
||
|
||
//
|
||
// If we got an error back in Status, then the Iosb
|
||
// was not written, so we will just copy the status
|
||
// there, then test the final status after that.
|
||
//
|
||
|
||
if (!NT_SUCCESS(Status)) {
|
||
IoStatus.Status = Status;
|
||
}
|
||
|
||
DebugTrace(-1, me, "CcSetValidData-> %08lx\n", IoStatus.Status );
|
||
|
||
return IoStatus.Status;
|
||
}
|
||
|
||
|
||
//
|
||
// Internal Support Routine
|
||
//
|
||
|
||
BOOLEAN
|
||
CcAcquireByteRangeForWrite (
|
||
IN PSHARED_CACHE_MAP SharedCacheMap,
|
||
IN PLARGE_INTEGER TargetOffset OPTIONAL,
|
||
IN ULONG TargetLength,
|
||
OUT PLARGE_INTEGER FileOffset,
|
||
OUT PULONG Length,
|
||
OUT PBCB *FirstBcb
|
||
)
|
||
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
This routine is called by the Lazy Writer to try to find a contiguous
|
||
range of bytes from the specified SharedCacheMap that are dirty and
|
||
should be flushed. After flushing, these bytes should be released
|
||
by calling CcReleaseByteRangeFromWrite.
|
||
|
||
Dirty ranges are returned in strictly increasing order.
|
||
|
||
Arguments:
|
||
|
||
SharedCacheMap - for the file for which the dirty byte range is sought
|
||
|
||
TargetOffset - If specified, then only the specified range is
|
||
to be flushed.
|
||
|
||
TargetLength - If target offset specified, this completes the range.
|
||
In any case, this field is zero for the Lazy Writer,
|
||
and nonzero for explicit flush calls.
|
||
|
||
FileOffset - Returns the offset for the beginning of the dirty byte
|
||
range to flush
|
||
|
||
Length - Returns the length of bytes in the range.
|
||
|
||
FirstBcb - Returns the first Bcb in the list for the range, to be used
|
||
when calling CcReleaseByteRangeFromWrite, or NULL if dirty
|
||
pages were found in the mask Bcb.
|
||
|
||
Return Value:
|
||
|
||
FALSE - if no dirty byte range could be found to match the necessary
|
||
criteria.
|
||
|
||
TRUE - if a dirty byte range is being returned.
|
||
|
||
--*/
|
||
|
||
{
|
||
KLOCK_QUEUE_HANDLE LockHandle;
|
||
PMBCB Mbcb;
|
||
PBCB Bcb;
|
||
LARGE_INTEGER LsnToFlushTo = {0, 0};
|
||
|
||
LOGICAL BcbLookasideCheck = FALSE;
|
||
|
||
PBITMAP_RANGE BitmapRange;
|
||
PULONG EndPtr;
|
||
PULONG MaskPtr;
|
||
ULONG Mask;
|
||
LONGLONG FirstDirtyPage;
|
||
ULONG OriginalFirstDirtyPage;
|
||
LONGLONG LastDirtyPage = MAXLONGLONG;
|
||
|
||
DebugTrace(+1, me, "CcAcquireByteRangeForWrite:\n", 0);
|
||
DebugTrace( 0, me, " SharedCacheMap = %08lx\n", SharedCacheMap);
|
||
|
||
//
|
||
// Initially clear outputs.
|
||
//
|
||
|
||
FileOffset->QuadPart = 0;
|
||
*Length = 0;
|
||
|
||
//
|
||
// We must acquire the SharedCacheMap->BcbSpinLock.
|
||
//
|
||
|
||
KeAcquireInStackQueuedSpinLock( &SharedCacheMap->BcbSpinLock, &LockHandle );
|
||
|
||
//
|
||
// See if there is a simple Mask Bcb, and if there is anything dirty in
|
||
// it. If so we will simply handle that case here by processing the bitmap.
|
||
//
|
||
|
||
Mbcb = SharedCacheMap->Mbcb;
|
||
|
||
if ((Mbcb != NULL) &&
|
||
(Mbcb->DirtyPages != 0) &&
|
||
((Mbcb->PagesToWrite != 0) || (TargetLength != 0))) {
|
||
|
||
//
|
||
// If a target range was specified (outside call to CcFlush for a range),
|
||
// then calculate FirstPage and EndPtr based on these inputs.
|
||
//
|
||
|
||
if (ARGUMENT_PRESENT(TargetOffset)) {
|
||
|
||
FirstDirtyPage = TargetOffset->QuadPart >> PAGE_SHIFT;
|
||
LastDirtyPage = (TargetOffset->QuadPart + TargetLength - 1) >> PAGE_SHIFT;
|
||
|
||
//
|
||
// Find the bitmap range containing the first dirty page.
|
||
//
|
||
|
||
BitmapRange = CcFindBitmapRangeToClean( Mbcb, FirstDirtyPage );
|
||
|
||
//
|
||
// If the target range is not dirty, get out. We may have even
|
||
// gotten back a nonoverlapping bitmap range.
|
||
//
|
||
|
||
if ((LastDirtyPage < (BitmapRange->BasePage + BitmapRange->FirstDirtyPage)) ||
|
||
(FirstDirtyPage > (BitmapRange->BasePage + BitmapRange->LastDirtyPage))) {
|
||
|
||
goto Scan_Bcbs;
|
||
}
|
||
|
||
if (LastDirtyPage < (BitmapRange->BasePage + BitmapRange->LastDirtyPage)) {
|
||
EndPtr = &BitmapRange->Bitmap[(ULONG)(LastDirtyPage - BitmapRange->BasePage) / 32];
|
||
} else {
|
||
EndPtr = &BitmapRange->Bitmap[BitmapRange->LastDirtyPage / 32];
|
||
}
|
||
|
||
|
||
//
|
||
// Otherwise, for the Lazy Writer pick up where we left off.
|
||
//
|
||
|
||
} else {
|
||
|
||
//
|
||
// If a length was specified, then it is an explicit flush, and
|
||
// we want to start with the first dirty page, else the Lazy Writer
|
||
// starts from the ResumeWritePage.
|
||
//
|
||
|
||
FirstDirtyPage = 0;
|
||
if (TargetLength == 0) {
|
||
FirstDirtyPage = Mbcb->ResumeWritePage;
|
||
}
|
||
|
||
//
|
||
// Now find the next (cyclic) dirty page from this point.
|
||
//
|
||
|
||
BitmapRange = CcFindBitmapRangeToClean( Mbcb, FirstDirtyPage );
|
||
|
||
//
|
||
// If the page we thought we were looking for is beyond the last dirty page
|
||
// of this range, then CcFindBitmapRangeToClean must have wrapped back to
|
||
// the start of the file, and we should resume on the first dirty page of
|
||
// this range.
|
||
//
|
||
|
||
if (FirstDirtyPage > (BitmapRange->BasePage + BitmapRange->LastDirtyPage)) {
|
||
FirstDirtyPage = BitmapRange->BasePage + BitmapRange->FirstDirtyPage;
|
||
}
|
||
|
||
EndPtr = &BitmapRange->Bitmap[BitmapRange->LastDirtyPage / 32];
|
||
}
|
||
|
||
//
|
||
// Now we can skip over any clean pages.
|
||
//
|
||
|
||
if (FirstDirtyPage < (BitmapRange->BasePage + BitmapRange->FirstDirtyPage)) {
|
||
FirstDirtyPage = BitmapRange->BasePage + BitmapRange->FirstDirtyPage;
|
||
}
|
||
|
||
//
|
||
// Form a few other inputs for our dirty page scan.
|
||
//
|
||
|
||
MaskPtr = &BitmapRange->Bitmap[(ULONG)(FirstDirtyPage - BitmapRange->BasePage) / 32];
|
||
Mask = (ULONG)(-1 << (FirstDirtyPage % 32));
|
||
OriginalFirstDirtyPage = (ULONG)(FirstDirtyPage - BitmapRange->BasePage);
|
||
|
||
//
|
||
// Because of the possibility of getting stuck on a "hot spot" which gets
|
||
// modified over and over, we want to be very careful to resume exactly
|
||
// at the recorded resume point. If there is nothing there, then we
|
||
// fall into the loop below to scan for nozero long words in the bitmap,
|
||
// starting at the next longword.
|
||
//
|
||
|
||
if ((*MaskPtr & Mask) == 0) {
|
||
|
||
//
|
||
// Before entering loop, set all mask bits and insure we increment from
|
||
// an even Ulong boundary.
|
||
//
|
||
|
||
Mask = MAXULONG;
|
||
FirstDirtyPage &= ~31;
|
||
|
||
//
|
||
// To scan the bitmap faster, we scan for entire long words which are
|
||
// nonzero.
|
||
//
|
||
|
||
do {
|
||
|
||
MaskPtr += 1;
|
||
FirstDirtyPage += 32;
|
||
|
||
//
|
||
// If we go beyond the end, then we must wrap back to the first
|
||
// dirty page. We will just go back to the start of the first
|
||
// longword.
|
||
//
|
||
|
||
if (MaskPtr > EndPtr) {
|
||
|
||
//
|
||
// We can backup the last dirty page hint to where we
|
||
// started scanning, if we are the lazy writer.
|
||
//
|
||
|
||
if (TargetLength == 0) {
|
||
ASSERT(OriginalFirstDirtyPage >= BitmapRange->FirstDirtyPage);
|
||
BitmapRange->LastDirtyPage = OriginalFirstDirtyPage - 1;
|
||
}
|
||
|
||
//
|
||
// We hit the end of our scan. Let's assume we are supposed
|
||
// to move on to the next range with dirty pages.
|
||
//
|
||
|
||
do {
|
||
|
||
//
|
||
// Go to the next range.
|
||
//
|
||
|
||
BitmapRange = (PBITMAP_RANGE)BitmapRange->Links.Flink;
|
||
|
||
//
|
||
// Did we hit the listhead?
|
||
//
|
||
|
||
if (BitmapRange == (PBITMAP_RANGE)&Mbcb->BitmapRanges) {
|
||
|
||
//
|
||
// If this is an explicit flush, then it is time to
|
||
// get out.
|
||
//
|
||
|
||
if (TargetLength != 0) {
|
||
goto Scan_Bcbs;
|
||
}
|
||
|
||
//
|
||
// Otherwise, we must wrap back to the first range in the
|
||
// Lazy Writer Scan.
|
||
//
|
||
|
||
BitmapRange = (PBITMAP_RANGE)BitmapRange->Links.Flink;
|
||
}
|
||
|
||
} while (BitmapRange->DirtyPages == 0);
|
||
|
||
//
|
||
// Now we have a new range with dirty pages, but if this is
|
||
// an explicit flush of a specified range, we may be done.
|
||
//
|
||
|
||
if ((LastDirtyPage < (BitmapRange->BasePage + BitmapRange->FirstDirtyPage)) ||
|
||
(FirstDirtyPage > (BitmapRange->BasePage + BitmapRange->LastDirtyPage))) {
|
||
|
||
goto Scan_Bcbs;
|
||
}
|
||
|
||
//
|
||
// Otherwise, we need to set up our context to resume scanning in this
|
||
// range.
|
||
//
|
||
|
||
MaskPtr = &BitmapRange->Bitmap[BitmapRange->FirstDirtyPage / 32];
|
||
EndPtr = &BitmapRange->Bitmap[BitmapRange->LastDirtyPage / 32];
|
||
FirstDirtyPage = BitmapRange->BasePage + (BitmapRange->FirstDirtyPage & ~31);
|
||
OriginalFirstDirtyPage = BitmapRange->FirstDirtyPage;
|
||
}
|
||
} while (*MaskPtr == 0);
|
||
}
|
||
|
||
//
|
||
// Calculate the first set bit in the mask that we hit on.
|
||
//
|
||
|
||
Mask = ~Mask + 1;
|
||
|
||
//
|
||
// Now loop to find the first set bit.
|
||
//
|
||
|
||
while ((*MaskPtr & Mask) == 0) {
|
||
|
||
Mask <<= 1;
|
||
FirstDirtyPage += 1;
|
||
}
|
||
|
||
//
|
||
// If a TargetOffset was specified, then make sure we do not start
|
||
// beyond the specified range or a dirty Bcb in the range.
|
||
//
|
||
|
||
if (ARGUMENT_PRESENT(TargetOffset)) {
|
||
|
||
if (FirstDirtyPage >= ((TargetOffset->QuadPart + TargetLength + PAGE_SIZE - 1) >> PAGE_SHIFT)) {
|
||
|
||
goto Scan_Bcbs;
|
||
}
|
||
|
||
//
|
||
// If Bcbs are present on this file, we must go scan to see if they
|
||
// describe a range that must be written first. If this is not the
|
||
// case, we'll hop back and continue building the range from the mask Bcb.
|
||
//
|
||
// Note that this case will be very rare. Bcbs are introduced into user
|
||
// files in limited situations (CcZero) and the reverse is never allowed
|
||
// to happen.
|
||
//
|
||
|
||
if (!IsListEmpty(&SharedCacheMap->BcbList)) {
|
||
|
||
BcbLookasideCheck = TRUE;
|
||
goto Scan_Bcbs;
|
||
}
|
||
}
|
||
|
||
Accept_Page:
|
||
|
||
//
|
||
// Now loop to count the set bits at that point, clearing them as we
|
||
// go because we plan to write the corresponding pages. Stop as soon
|
||
// as we find a clean page, or we reach our maximum write size. Of
|
||
// course we want to ignore long word boundaries and keep trying to
|
||
// extend the write. We do not check for wrapping around the end of
|
||
// the bitmap here, because we guarantee some zero bits at the end
|
||
// in CcSetDirtyInMask.
|
||
//
|
||
|
||
while (((*MaskPtr & Mask) != 0) && (*Length < (MAX_WRITE_BEHIND / PAGE_SIZE)) &&
|
||
(!ARGUMENT_PRESENT(TargetOffset) || ((FirstDirtyPage + *Length) <
|
||
(ULONG)((TargetOffset->QuadPart + TargetLength + PAGE_SIZE - 1) >> PAGE_SHIFT)))) {
|
||
|
||
ASSERT(MaskPtr <= (&BitmapRange->Bitmap[BitmapRange->LastDirtyPage / 32]));
|
||
|
||
*MaskPtr -= Mask;
|
||
*Length += 1;
|
||
Mask <<= 1;
|
||
|
||
if (Mask == 0) {
|
||
|
||
MaskPtr += 1;
|
||
Mask = 1;
|
||
|
||
if (MaskPtr > EndPtr) {
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
|
||
//
|
||
// Now reduce the count of pages we were supposed to write this time,
|
||
// possibly clearing this count.
|
||
//
|
||
|
||
if (*Length < Mbcb->PagesToWrite) {
|
||
|
||
Mbcb->PagesToWrite -= *Length;
|
||
|
||
} else {
|
||
|
||
Mbcb->PagesToWrite = 0;
|
||
}
|
||
|
||
//
|
||
// Reduce the dirty page counts by the number of pages we just cleared.
|
||
//
|
||
|
||
ASSERT(Mbcb->DirtyPages >= *Length);
|
||
Mbcb->DirtyPages -= *Length;
|
||
BitmapRange->DirtyPages -= *Length;
|
||
|
||
CcAcquireMasterLockAtDpcLevel();
|
||
CcDeductDirtyPages( SharedCacheMap, *Length );
|
||
|
||
//
|
||
// Normally we need to reduce CcPagesYetToWrite appropriately.
|
||
//
|
||
|
||
if (CcPagesYetToWrite > *Length) {
|
||
CcPagesYetToWrite -= *Length;
|
||
} else {
|
||
CcPagesYetToWrite = 0;
|
||
}
|
||
|
||
//
|
||
// If we took out the last dirty page, then move the SharedCacheMap
|
||
// back to the clean list.
|
||
//
|
||
|
||
if (SharedCacheMap->DirtyPages == 0) {
|
||
|
||
RemoveEntryList( &SharedCacheMap->SharedCacheMapLinks );
|
||
InsertTailList( &CcCleanSharedCacheMapList,
|
||
&SharedCacheMap->SharedCacheMapLinks );
|
||
}
|
||
CcReleaseMasterLockFromDpcLevel();
|
||
|
||
//
|
||
// If the number of dirty pages for the Mbcb went to zero, we can reset
|
||
// our hint fields now.
|
||
//
|
||
|
||
if (BitmapRange->DirtyPages == 0) {
|
||
|
||
BitmapRange->FirstDirtyPage = MAXULONG;
|
||
BitmapRange->LastDirtyPage = 0;
|
||
|
||
//
|
||
// Assume this is a large file and that the resume point should
|
||
// be at the beginning of the next range. In all cases if the resume
|
||
// point is set too high, the next resume will just wrap back to 0 anyway.
|
||
//
|
||
|
||
Mbcb->ResumeWritePage = BitmapRange->BasePage + (MBCB_BITMAP_BLOCK_SIZE * 8);
|
||
|
||
//
|
||
// Otherwise we have to update the hint fields.
|
||
//
|
||
|
||
} else {
|
||
|
||
//
|
||
// Advance the first dirty page hint if we can.
|
||
//
|
||
|
||
if (BitmapRange->FirstDirtyPage == OriginalFirstDirtyPage) {
|
||
|
||
BitmapRange->FirstDirtyPage = (ULONG)(FirstDirtyPage - BitmapRange->BasePage) + *Length;
|
||
}
|
||
|
||
//
|
||
// Set to resume the next scan at the next bit for
|
||
// the Lazy Writer.
|
||
//
|
||
|
||
if (TargetLength == 0) {
|
||
|
||
Mbcb->ResumeWritePage = FirstDirtyPage + *Length;
|
||
}
|
||
}
|
||
|
||
//
|
||
// We can save a callback by letting our caller know when
|
||
// we have no more pages to write.
|
||
//
|
||
|
||
if (IsListEmpty(&SharedCacheMap->BcbList)) {
|
||
SharedCacheMap->PagesToWrite = Mbcb->PagesToWrite;
|
||
}
|
||
|
||
KeReleaseInStackQueuedSpinLock( &LockHandle );
|
||
|
||
//
|
||
// Now form all of our outputs. We calculated *Length as a page count,
|
||
// but our caller wants it in bytes.
|
||
//
|
||
|
||
*Length <<= PAGE_SHIFT;
|
||
FileOffset->QuadPart = (LONGLONG)FirstDirtyPage << PAGE_SHIFT;
|
||
*FirstBcb = NULL;
|
||
|
||
DebugTrace2(0, me, " <FileOffset = %08lx, %08lx\n", FileOffset->LowPart,
|
||
FileOffset->HighPart );
|
||
DebugTrace( 0, me, " <Length = %08lx\n", *Length );
|
||
DebugTrace(-1, me, "CcAcquireByteRangeForWrite -> TRUE\n", 0 );
|
||
|
||
return TRUE;
|
||
}
|
||
|
||
//
|
||
// We get here if there is no Mbcb or no dirty pages in it. Note that we
|
||
// wouldn't even be here if there were no dirty pages in this SharedCacheMap.
|
||
//
|
||
|
||
//
|
||
// Now point to last Bcb in List, and loop until we hit one of the
|
||
// breaks below or the beginning of the list.
|
||
//
|
||
|
||
Scan_Bcbs:
|
||
|
||
//
|
||
// Use while TRUE to handle case where the current target range wraps
|
||
// (escape is at the bottom).
|
||
//
|
||
|
||
while (TRUE) {
|
||
|
||
Bcb = CONTAINING_RECORD( SharedCacheMap->BcbList.Blink, BCB, BcbLinks );
|
||
|
||
//
|
||
// If we are to resume from a nonzero FileOffset, call CcFindBcb
|
||
// to get a quicker start. This is only useful on files that make
|
||
// use of significant pinned access, of course.
|
||
//
|
||
|
||
if (FlagOn(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED)) {
|
||
|
||
PLARGE_INTEGER StartingOffset;
|
||
|
||
if (ARGUMENT_PRESENT(TargetOffset)) {
|
||
StartingOffset = TargetOffset;
|
||
} else {
|
||
StartingOffset = (PLARGE_INTEGER)&SharedCacheMap->BeyondLastFlush;
|
||
}
|
||
|
||
if (StartingOffset->QuadPart != 0) {
|
||
|
||
LARGE_INTEGER StartingOffsetBias;
|
||
|
||
StartingOffsetBias.QuadPart = StartingOffset->QuadPart + PAGE_SIZE;
|
||
|
||
//
|
||
// Position ourselves. If we did not find a Bcb for the page, then
|
||
// a lower FileOffset was returned, so we want to move forward one.
|
||
//
|
||
|
||
if (!CcFindBcb( SharedCacheMap,
|
||
StartingOffset,
|
||
&StartingOffsetBias,
|
||
&Bcb )) {
|
||
Bcb = CONTAINING_RECORD( Bcb->BcbLinks.Blink, BCB, BcbLinks );
|
||
}
|
||
}
|
||
}
|
||
|
||
while (&Bcb->BcbLinks != &SharedCacheMap->BcbList) {
|
||
|
||
//
|
||
// Skip over this item if it is a listhead.
|
||
//
|
||
|
||
if (Bcb->NodeTypeCode != CACHE_NTC_BCB) {
|
||
|
||
Bcb = CONTAINING_RECORD( Bcb->BcbLinks.Blink, BCB, BcbLinks );
|
||
continue;
|
||
}
|
||
|
||
//
|
||
// If we are doing a specified range, then get out if we hit a
|
||
// higher Bcb.
|
||
//
|
||
|
||
if (ARGUMENT_PRESENT(TargetOffset) &&
|
||
((TargetOffset->QuadPart + TargetLength) <= Bcb->FileOffset.QuadPart)) {
|
||
|
||
break;
|
||
}
|
||
|
||
//
|
||
// If we have not started a run, then see if this Bcb is a candidate
|
||
// to start one.
|
||
//
|
||
|
||
if (*Length == 0) {
|
||
|
||
//
|
||
// Else see if the Bcb is dirty, and is in our specified range, if
|
||
// there is one.
|
||
//
|
||
|
||
if (!Bcb->Dirty ||
|
||
(ARGUMENT_PRESENT(TargetOffset) && (TargetOffset->QuadPart >= Bcb->BeyondLastByte.QuadPart)) ||
|
||
(!ARGUMENT_PRESENT(TargetOffset) && (Bcb->FileOffset.QuadPart < SharedCacheMap->BeyondLastFlush))) {
|
||
|
||
Bcb = CONTAINING_RECORD( Bcb->BcbLinks.Blink, BCB, BcbLinks );
|
||
continue;
|
||
|
||
}
|
||
|
||
//
|
||
// If we have a candidate dirty page from the mask Bcb, see
|
||
// if it describes a prior range. We must decide to return
|
||
// the first dirty range.
|
||
//
|
||
|
||
if (BcbLookasideCheck && FirstDirtyPage <= (ULONG)(Bcb->FileOffset.QuadPart >> PAGE_SHIFT)) {
|
||
goto Accept_Page;
|
||
}
|
||
}
|
||
|
||
//
|
||
// Else, if we have started a run, then if this guy cannot be
|
||
// appended to the run, then break. Note that we ignore the
|
||
// Bcb's modification time stamp here to simplify the test.
|
||
//
|
||
// If the Bcb is currently pinned, then there is no sense in causing
|
||
// contention, so we will skip over this guy as well.
|
||
//
|
||
// Finally, if the new Bcb is in the next Vacb level, we will skip it
|
||
// to avoid problems with Bcb listheads going away in the middle of
|
||
// CcReleaseByteRangeFromWrite.
|
||
//
|
||
|
||
else {
|
||
if (!Bcb->Dirty || ( Bcb->FileOffset.QuadPart != ( FileOffset->QuadPart + (LONGLONG)*Length)) ||
|
||
(*Length + Bcb->ByteLength > MAX_WRITE_BEHIND) ||
|
||
(Bcb->PinCount != 0) ||
|
||
((Bcb->FileOffset.QuadPart & (VACB_SIZE_OF_FIRST_LEVEL - 1)) == 0)) {
|
||
|
||
break;
|
||
}
|
||
}
|
||
|
||
//
|
||
// Increment PinCount to prevent Bcb from going away once the
|
||
// SpinLock is released, or we set it clean for the case where
|
||
// modified write is allowed.
|
||
//
|
||
|
||
Bcb->PinCount += 1;
|
||
|
||
//
|
||
// Release the SpinLock before waiting on the resource.
|
||
//
|
||
|
||
KeReleaseInStackQueuedSpinLock( &LockHandle );
|
||
|
||
if (FlagOn(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED) &&
|
||
!FlagOn(SharedCacheMap->Flags, DISABLE_WRITE_BEHIND)) {
|
||
|
||
//
|
||
// Now acquire the Bcb exclusive, so that we know that nobody
|
||
// has it pinned and thus no one can be modifying the described
|
||
// buffer. To acquire the first Bcb in a run, we can afford
|
||
// to wait, because we are not holding any resources. However
|
||
// if we already have a Bcb, then we better not wait, because
|
||
// someone could have this Bcb pinned, and then wait for the
|
||
// Bcb we already have exclusive.
|
||
//
|
||
// For streams for which we have not disabled modified page
|
||
// writing, we do not need to acquire this resource, and the
|
||
// foreground processing will not be acquiring the Bcb either.
|
||
//
|
||
|
||
if (!ExAcquireResourceExclusiveLite( &Bcb->Resource,
|
||
(BOOLEAN)(*Length == 0) )) {
|
||
|
||
DebugTrace( 0, me, "Could not acquire 2nd Bcb\n", 0 );
|
||
|
||
//
|
||
// Release the Bcb count we took out above. We say
|
||
// ReadOnly = TRUE since we do not own the resource,
|
||
// and SetClean = FALSE because we just want to decement
|
||
// the count.
|
||
//
|
||
|
||
CcUnpinFileData( Bcb, TRUE, UNPIN );
|
||
|
||
//
|
||
// When we leave the loop, we have to have the spin lock
|
||
//
|
||
|
||
KeAcquireInStackQueuedSpinLock( &SharedCacheMap->BcbSpinLock, &LockHandle );
|
||
break;
|
||
}
|
||
|
||
KeAcquireInStackQueuedSpinLock( &SharedCacheMap->BcbSpinLock, &LockHandle );
|
||
|
||
//
|
||
// If someone has the file open WriteThrough, then the Bcb may no
|
||
// longer be dirty. If so, call CcUnpinFileData to decrement the
|
||
// PinCount we incremented and free the resource.
|
||
//
|
||
|
||
if (!Bcb->Dirty) {
|
||
|
||
//
|
||
// Release the spinlock so that we can call CcUnpinFileData
|
||
//
|
||
|
||
KeReleaseInStackQueuedSpinLock( &LockHandle );
|
||
|
||
CcUnpinFileData( Bcb, FALSE, UNPIN );
|
||
|
||
KeAcquireInStackQueuedSpinLock( &SharedCacheMap->BcbSpinLock, &LockHandle );
|
||
|
||
//
|
||
// Now if we already have some data we can just break to return
|
||
// it, otherwise we have to restart the scan, since our Bcb
|
||
// may have gone away.
|
||
//
|
||
|
||
if (*Length != 0) {
|
||
break;
|
||
}
|
||
else {
|
||
|
||
Bcb = CONTAINING_RECORD( SharedCacheMap->BcbList.Blink, BCB, BcbLinks );
|
||
continue;
|
||
}
|
||
}
|
||
|
||
//
|
||
// If we are not in the disable modified write mode (normal user data)
|
||
// then we must set the buffer clean before doing the write, since we
|
||
// are unsynchronized with anyone producing dirty data. That way if we,
|
||
// for example, are writing data out while it is actively being changed,
|
||
// at least the changer will mark the buffer dirty afterwards and cause
|
||
// us to write it again later.
|
||
//
|
||
|
||
} else {
|
||
|
||
CcUnpinFileData( Bcb, TRUE, SET_CLEAN );
|
||
|
||
KeAcquireInStackQueuedSpinLock( &SharedCacheMap->BcbSpinLock, &LockHandle );
|
||
}
|
||
|
||
DebugTrace( 0, me, "Adding Bcb = %08lx to run\n", Bcb );
|
||
|
||
//
|
||
// No matter what, once we've reached this point we are returning
|
||
// a range from the Bcbs.
|
||
//
|
||
|
||
BcbLookasideCheck = FALSE;
|
||
|
||
//
|
||
// Update all of our return values. Note that FirstBcb refers to the
|
||
// FirstBcb in terms of how the Bcb list is ordered. Since the Bcb list
|
||
// is ordered by descending file offsets, FirstBcb will actually return
|
||
// the Bcb with the highest FileOffset.
|
||
//
|
||
|
||
if (*Length == 0) {
|
||
*FileOffset = Bcb->FileOffset;
|
||
}
|
||
*FirstBcb = Bcb;
|
||
*Length += Bcb->ByteLength;
|
||
|
||
//
|
||
// If there is a log file flush callback for this stream, then we must
|
||
// remember the largest Lsn we are about to flush.
|
||
//
|
||
|
||
if ((SharedCacheMap->FlushToLsnRoutine != NULL) &&
|
||
(Bcb->NewestLsn.QuadPart > LsnToFlushTo.QuadPart)) {
|
||
|
||
LsnToFlushTo = Bcb->NewestLsn;
|
||
}
|
||
|
||
Bcb = CONTAINING_RECORD( Bcb->BcbLinks.Blink, BCB, BcbLinks );
|
||
}
|
||
|
||
//
|
||
// If we have a candidate dirty page from the mask Bcb, accept it
|
||
// since no Bcb has been found.
|
||
//
|
||
|
||
if (BcbLookasideCheck) {
|
||
|
||
ASSERT( *Length == 0 );
|
||
goto Accept_Page;
|
||
}
|
||
|
||
//
|
||
// If we found something, update our last flush range and reduce
|
||
// PagesToWrite.
|
||
//
|
||
|
||
if (*Length != 0) {
|
||
|
||
//
|
||
// If this is the Lazy Writer, then update BeyondLastFlush and
|
||
// the PagesToWrite target.
|
||
//
|
||
|
||
if (!ARGUMENT_PRESENT(TargetOffset)) {
|
||
|
||
SharedCacheMap->BeyondLastFlush = FileOffset->QuadPart + *Length;
|
||
|
||
if (SharedCacheMap->PagesToWrite > (*Length >> PAGE_SHIFT)) {
|
||
SharedCacheMap->PagesToWrite -= (*Length >> PAGE_SHIFT);
|
||
} else {
|
||
SharedCacheMap->PagesToWrite = 0;
|
||
}
|
||
}
|
||
|
||
break;
|
||
|
||
//
|
||
// Else, if we scanned the entire file, get out - nothing to write now.
|
||
//
|
||
|
||
} else if ((SharedCacheMap->BeyondLastFlush == 0) || ARGUMENT_PRESENT(TargetOffset)) {
|
||
break;
|
||
}
|
||
|
||
//
|
||
// Otherwise, we may have not found anything because there is nothing
|
||
// beyond the last flush. In that case it is time to wrap back to 0
|
||
// and keep scanning.
|
||
//
|
||
|
||
SharedCacheMap->BeyondLastFlush = 0;
|
||
}
|
||
|
||
//
|
||
// Now release the spinlock file while we go off and do the I/O
|
||
//
|
||
|
||
KeReleaseInStackQueuedSpinLock( &LockHandle );
|
||
|
||
//
|
||
// If we need to flush to some Lsn, this is the time to do it now
|
||
// that we have found the largest Lsn and freed the spin lock.
|
||
//
|
||
|
||
if (LsnToFlushTo.QuadPart != 0) {
|
||
|
||
try {
|
||
|
||
(*SharedCacheMap->FlushToLsnRoutine) ( SharedCacheMap->LogHandle,
|
||
LsnToFlushTo );
|
||
} except( CcExceptionFilter( GetExceptionCode() )) {
|
||
|
||
//
|
||
// If there was an error, it will be raised. We cannot
|
||
// write anything until we successfully flush the log
|
||
// file, so we will release everything here and just
|
||
// return with 0 bytes.
|
||
//
|
||
|
||
LARGE_INTEGER LastOffset;
|
||
PBCB NextBcb;
|
||
|
||
//
|
||
// Now loop to free up all of the Bcbs. Set the time
|
||
// stamps to 0, so that we are guaranteed to try to
|
||
// flush them again on the next sweep.
|
||
//
|
||
|
||
do {
|
||
NextBcb = CONTAINING_RECORD( (*FirstBcb)->BcbLinks.Flink, BCB, BcbLinks );
|
||
|
||
//
|
||
// Skip over any listheads.
|
||
//
|
||
|
||
if ((*FirstBcb)->NodeTypeCode == CACHE_NTC_BCB) {
|
||
|
||
LastOffset = (*FirstBcb)->FileOffset;
|
||
|
||
CcUnpinFileData( *FirstBcb,
|
||
BooleanFlagOn(SharedCacheMap->Flags, DISABLE_WRITE_BEHIND),
|
||
UNPIN );
|
||
}
|
||
|
||
*FirstBcb = NextBcb;
|
||
} while (FileOffset->QuadPart != LastOffset.QuadPart);
|
||
|
||
//
|
||
// Show we did not acquire anything.
|
||
//
|
||
|
||
*Length = 0;
|
||
}
|
||
}
|
||
|
||
//
|
||
// If we got anything, return TRUE.
|
||
//
|
||
|
||
DebugTrace2(0, me, " <FileOffset = %08lx, %08lx\n", FileOffset->LowPart,
|
||
FileOffset->HighPart );
|
||
DebugTrace( 0, me, " <Length = %08lx\n", *Length );
|
||
DebugTrace(-1, me, "CcAcquireByteRangeForWrite -> %02lx\n", *Length != 0 );
|
||
|
||
return ((BOOLEAN)(*Length != 0));
|
||
}
|
||
|
||
|
||
//
|
||
// Internal Support Routine
|
||
//
|
||
|
||
VOID
|
||
CcReleaseByteRangeFromWrite (
|
||
IN PSHARED_CACHE_MAP SharedCacheMap,
|
||
IN PLARGE_INTEGER FileOffset,
|
||
IN ULONG Length,
|
||
IN PBCB FirstBcb,
|
||
IN BOOLEAN VerifyRequired
|
||
)
|
||
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
This routine is called by the Lazy Writer to free a range of bytes and
|
||
clear all dirty bits, for a byte range returned by CcAcquireByteRangeForWrite.
|
||
|
||
Arguments:
|
||
|
||
SharedCacheMap - As supplied to CcAcquireByteRangeForWrite
|
||
|
||
FileOffset - As returned from CcAcquireByteRangeForWrite
|
||
|
||
Length - As returned from CcAcquirebyteRangeForWrite
|
||
|
||
FirstBcb - As returned from CcAcquireByteRangeForWrite
|
||
|
||
VerifyRequired - supplied as TRUE if a verify required error was received.
|
||
In this case we must mark/leave the data dirty so that
|
||
we will try to write it again.
|
||
|
||
Return Value:
|
||
|
||
None
|
||
|
||
--*/
|
||
|
||
{
|
||
LARGE_INTEGER LastOffset;
|
||
PBCB NextBcb;
|
||
|
||
DebugTrace(+1, me, "CcReleaseByteRangeFromWrite:\n", 0);
|
||
DebugTrace2(0, me, " FileOffset = %08lx, %08lx\n", FileOffset->LowPart,
|
||
FileOffset->HighPart );
|
||
|
||
//
|
||
// If it is a mask Mbcb we are getting, then we only have to check
|
||
// for VerifyRequired.
|
||
//
|
||
|
||
if (FirstBcb == NULL) {
|
||
|
||
ASSERT(Length != 0);
|
||
|
||
if (VerifyRequired) {
|
||
CcSetDirtyInMask( SharedCacheMap, FileOffset, Length );
|
||
}
|
||
|
||
DebugTrace(-1, me, "CcReleaseByteRangeFromWrite -> VOID\n", 0);
|
||
|
||
return;
|
||
}
|
||
|
||
//
|
||
// PREfix correctly notes that if the caller gives us a listhead to start with,
|
||
// we will not have filled in LastOffset by the time we do our first loop test.
|
||
// For PREfix's benefit (and ours), assert we really are starting with a Bcb.
|
||
//
|
||
|
||
ASSERT( FirstBcb->NodeTypeCode == CACHE_NTC_BCB );
|
||
|
||
//
|
||
// Now loop to free up all of the Bcbs. If modified writing is disabled
|
||
// for each Bcb, then we are to set it clean here, since we are synchronized
|
||
// with callers who set the data dirty. Otherwise we only have the Bcb pinned
|
||
// so it will not go away, and we only unpin it here.
|
||
//
|
||
|
||
do {
|
||
NextBcb = CONTAINING_RECORD( FirstBcb->BcbLinks.Flink, BCB, BcbLinks );
|
||
|
||
//
|
||
// Skip over any listheads.
|
||
//
|
||
|
||
if (FirstBcb->NodeTypeCode == CACHE_NTC_BCB) {
|
||
|
||
LastOffset = FirstBcb->FileOffset;
|
||
|
||
//
|
||
// If this is file system metadata (we disabled modified writing),
|
||
// then this is the time to mark the buffer clean, so long as we
|
||
// did not get verify required.
|
||
//
|
||
|
||
if (FlagOn(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED)) {
|
||
|
||
CcUnpinFileData( FirstBcb,
|
||
BooleanFlagOn(SharedCacheMap->Flags, DISABLE_WRITE_BEHIND),
|
||
SET_CLEAN );
|
||
}
|
||
|
||
//
|
||
// If we got verify required, we have to mark the buffer dirty again
|
||
// so we will try again later. Note we have to make this call again
|
||
// to make sure the right thing happens with time stamps.
|
||
//
|
||
|
||
if (VerifyRequired) {
|
||
CcSetDirtyPinnedData( FirstBcb, NULL );
|
||
}
|
||
|
||
//
|
||
// Finally remove a pin count left over from CcAcquireByteRangeForWrite.
|
||
//
|
||
|
||
CcUnpinFileData( FirstBcb, TRUE, UNPIN );
|
||
}
|
||
|
||
FirstBcb = NextBcb;
|
||
} while (FileOffset->QuadPart != LastOffset.QuadPart);
|
||
|
||
DebugTrace(-1, me, "CcReleaseByteRangeFromWrite -> VOID\n", 0);
|
||
}
|
||
|
||
|
||
//
|
||
// Internal Support Routine
|
||
//
|
||
|
||
VOID
|
||
FASTCALL
|
||
CcWriteBehind (
|
||
IN PSHARED_CACHE_MAP SharedCacheMap,
|
||
IN PIO_STATUS_BLOCK IoStatus
|
||
)
|
||
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
This routine may be called with Wait = FALSE to see if write behind
|
||
is required, or with Wait = TRUE to perform write behind as required.
|
||
|
||
The code is very similar to the the code that the Lazy Writer performs
|
||
for each SharedCacheMap. The main difference is in the call to
|
||
CcAcquireByteRangeForWrite. Write Behind does not care about time
|
||
stamps (passing ULONG to accept all time stamps), but it will never
|
||
dump the first (highest byte offset) buffer in the list if the last
|
||
byte of that buffer is not yet written. The Lazy Writer does exactly
|
||
the opposite, in the sense that it is totally time-driven, and will
|
||
even dump a partially modified buffer if it sits around long enough.
|
||
|
||
Arguments:
|
||
|
||
SharedCacheMap - Pointer to SharedCacheMap to be written
|
||
|
||
Return Value:
|
||
|
||
FALSE - if write behind is required, but the caller supplied
|
||
Wait = FALSE
|
||
|
||
TRUE - if write behind is complete or not required
|
||
|
||
--*/
|
||
|
||
{
|
||
KLOCK_QUEUE_HANDLE LockHandle;
|
||
ULONG ActivePage;
|
||
ULONG PageIsDirty;
|
||
PMBCB Mbcb;
|
||
NTSTATUS Status;
|
||
PVACB ActiveVacb = NULL;
|
||
|
||
DebugTrace(+1, me, "CcWriteBehind\n", 0 );
|
||
DebugTrace( 0, me, " SharedCacheMap = %08lx\n", SharedCacheMap );
|
||
|
||
//
|
||
// First we have to acquire the file for LazyWrite, to avoid
|
||
// deadlocking with writers to the file. We do this via the
|
||
// CallBack procedure specified to CcInitializeCacheMap.
|
||
//
|
||
|
||
if (!(*SharedCacheMap->Callbacks->AcquireForLazyWrite)
|
||
( SharedCacheMap->LazyWriteContext, TRUE )) {
|
||
|
||
//
|
||
// The filesystem is hinting that it doesn't think that it can
|
||
// service the write without significant delay so we will defer
|
||
// and come back later. Simply drop the queued flag ... note that
|
||
// we do not modify CcPagesYetToWrite, in the hope that we can make
|
||
// up the difference in some other cache map on this pass.
|
||
//
|
||
|
||
CcAcquireMasterLock( &LockHandle.OldIrql );
|
||
ClearFlag(SharedCacheMap->Flags, WRITE_QUEUED);
|
||
CcReleaseMasterLock( LockHandle.OldIrql );
|
||
|
||
IoStatus->Status = STATUS_FILE_LOCK_CONFLICT;
|
||
return;
|
||
}
|
||
|
||
//
|
||
// See if there is a previous active page to clean up, but only
|
||
// do so now if it is the last dirty page or no users have the
|
||
// file open. We will free it below after dropping the spinlock.
|
||
//
|
||
|
||
KeAcquireInStackQueuedSpinLock( &SharedCacheMap->BcbSpinLock, &LockHandle );
|
||
CcAcquireMasterLockAtDpcLevel();
|
||
|
||
if ((SharedCacheMap->DirtyPages <= 1) || (SharedCacheMap->OpenCount == 0)) {
|
||
GetActiveVacbAtDpcLevel( SharedCacheMap, ActiveVacb, ActivePage, PageIsDirty );
|
||
}
|
||
|
||
//
|
||
// Increment open count so that our caller's views stay available
|
||
// for CcGetVacbMiss. We could be tying up all of the views, and
|
||
// still need to write file sizes.
|
||
//
|
||
|
||
CcIncrementOpenCount( SharedCacheMap, 'brWS' );
|
||
|
||
//
|
||
// If there is a mask bcb, then we need to establish a target for
|
||
// it to flush.
|
||
//
|
||
|
||
if ((Mbcb = SharedCacheMap->Mbcb) != 0) {
|
||
|
||
//
|
||
// Set a target of pages to write, assuming that any Active
|
||
// Vacb will increase the number.
|
||
//
|
||
|
||
Mbcb->PagesToWrite = Mbcb->DirtyPages + ((ActiveVacb != NULL) ? 1 : 0);
|
||
|
||
if (Mbcb->PagesToWrite > CcPagesYetToWrite) {
|
||
|
||
Mbcb->PagesToWrite = CcPagesYetToWrite;
|
||
}
|
||
}
|
||
|
||
CcReleaseMasterLockFromDpcLevel();
|
||
KeReleaseInStackQueuedSpinLock( &LockHandle );
|
||
|
||
//
|
||
// Now free the active Vacb, if we found one.
|
||
//
|
||
|
||
if (ActiveVacb != NULL) {
|
||
|
||
CcFreeActiveVacb( SharedCacheMap, ActiveVacb, ActivePage, PageIsDirty );
|
||
}
|
||
|
||
//
|
||
// Now perform the lazy writing for this file via a special call
|
||
// to CcFlushCache. He recognizes us by the &CcNoDelay input to
|
||
// FileOffset, which signifies a Lazy Write, but is subsequently
|
||
// ignored.
|
||
//
|
||
|
||
CcFlushCache( SharedCacheMap->FileObject->SectionObjectPointer,
|
||
&CcNoDelay,
|
||
1,
|
||
IoStatus );
|
||
|
||
//
|
||
// No need for the Lazy Write resource now.
|
||
//
|
||
|
||
(*SharedCacheMap->Callbacks->ReleaseFromLazyWrite)
|
||
( SharedCacheMap->LazyWriteContext );
|
||
|
||
//
|
||
// Check if we need to put up a popup.
|
||
//
|
||
|
||
if (!NT_SUCCESS(IoStatus->Status) && !RetryError(IoStatus->Status)) {
|
||
|
||
//
|
||
// We lost writebehind data. Bemoan our fate into the system event
|
||
// log and throw a popup with a meaningful name to the desktop.
|
||
//
|
||
|
||
POBJECT_NAME_INFORMATION FileNameInfo = NULL;
|
||
NTSTATUS Status;
|
||
|
||
//
|
||
// Increment the count of how many of these we've had. This counter
|
||
// is useful in attempting to discriminate some corruption cases under
|
||
// test.
|
||
//
|
||
|
||
CcLostDelayedWrites += 1;
|
||
|
||
Status = IoQueryFileDosDeviceName( SharedCacheMap->FileObject, &FileNameInfo );
|
||
|
||
if ( Status == STATUS_SUCCESS ) {
|
||
IoRaiseInformationalHardError( STATUS_LOST_WRITEBEHIND_DATA, &FileNameInfo->Name, NULL );
|
||
|
||
} else {
|
||
if ( SharedCacheMap->FileObject->FileName.Length &&
|
||
SharedCacheMap->FileObject->FileName.MaximumLength &&
|
||
SharedCacheMap->FileObject->FileName.Buffer ) {
|
||
|
||
IoRaiseInformationalHardError( STATUS_LOST_WRITEBEHIND_DATA, &SharedCacheMap->FileObject->FileName, NULL );
|
||
}
|
||
}
|
||
|
||
CcLogError( SharedCacheMap->FileObject,
|
||
( Status == STATUS_SUCCESS ?
|
||
&FileNameInfo->Name :
|
||
&SharedCacheMap->FileObject->FileName ),
|
||
IO_LOST_DELAYED_WRITE,
|
||
IoStatus->Status,
|
||
IRP_MJ_WRITE );
|
||
|
||
if (FileNameInfo) {
|
||
ExFreePool(FileNameInfo);
|
||
}
|
||
|
||
//
|
||
// See if there is any deferred writes we can post.
|
||
//
|
||
|
||
} else if (!IsListEmpty(&CcDeferredWrites)) {
|
||
CcPostDeferredWrites();
|
||
}
|
||
|
||
//
|
||
// Now acquire BcbSpinLock again to check for ValidData updates.
|
||
//
|
||
|
||
KeAcquireInStackQueuedSpinLock( &SharedCacheMap->BcbSpinLock, &LockHandle );
|
||
|
||
//
|
||
// If the the current ValidDataGoal is greater (or equal) than ValidDataLength,
|
||
// then we must see if we have advanced beyond the current ValidDataLength.
|
||
//
|
||
// If we have NEVER written anything out from this shared cache map, then
|
||
// there is no need to check anything associtated with valid data length
|
||
// here. We will come by here again when, and if, anybody actually
|
||
// modifies the file and we lazy write some data.
|
||
//
|
||
|
||
Status = STATUS_SUCCESS;
|
||
if (FlagOn(SharedCacheMap->Flags, LAZY_WRITE_OCCURRED) &&
|
||
(SharedCacheMap->ValidDataGoal.QuadPart >= SharedCacheMap->ValidDataLength.QuadPart) &&
|
||
(SharedCacheMap->ValidDataLength.QuadPart != MAXLONGLONG) &&
|
||
(SharedCacheMap->FileSize.QuadPart != 0)) {
|
||
|
||
LARGE_INTEGER NewValidDataLength;
|
||
|
||
NewValidDataLength = CcGetFlushedValidData( SharedCacheMap->FileObject->SectionObjectPointer,
|
||
TRUE );
|
||
|
||
//
|
||
// If New ValidDataLength has been written, then we have to
|
||
// call the file system back to update it. We must temporarily
|
||
// drop our global list while we do this, which is safe to do since
|
||
// we have not cleared WRITE_QUEUED.
|
||
//
|
||
// Note we keep calling any time we wrote the last page of the file,
|
||
// to solve the "famous" AFS Server problem. The file system will
|
||
// truncate our valid data call to whatever is currently valid. But
|
||
// then if he writes a little more, we do not want to stop calling
|
||
// back.
|
||
//
|
||
|
||
if ( NewValidDataLength.QuadPart >= SharedCacheMap->ValidDataLength.QuadPart ) {
|
||
|
||
KeReleaseInStackQueuedSpinLock( &LockHandle );
|
||
|
||
//
|
||
// Call file system to set new valid data. We have no
|
||
// one to tell if this doesn't work.
|
||
//
|
||
|
||
Status = CcSetValidData( SharedCacheMap->FileObject,
|
||
&NewValidDataLength );
|
||
|
||
KeAcquireInStackQueuedSpinLock( &SharedCacheMap->BcbSpinLock, &LockHandle );
|
||
if (NT_SUCCESS(Status)) {
|
||
SharedCacheMap->ValidDataLength = NewValidDataLength;
|
||
#ifdef TOMM
|
||
} else if ((Status != STATUS_INSUFFICIENT_RESOURCES) && !RetryError(Status)) {
|
||
DbgPrint("Unexpected status from CcSetValidData: %08lx, FileObject: %08lx\n",
|
||
Status,
|
||
SharedCacheMap->FileObject);
|
||
DbgBreakPoint();
|
||
#endif TOMM
|
||
}
|
||
}
|
||
}
|
||
|
||
KeReleaseInStackQueuedSpinLock( &LockHandle );
|
||
|
||
//
|
||
// Show we are done.
|
||
//
|
||
|
||
CcAcquireMasterLock( &LockHandle.OldIrql );
|
||
CcDecrementOpenCount( SharedCacheMap, 'brWF' );
|
||
|
||
//
|
||
// Make an approximate guess about whether we will call CcDeleteSharedCacheMap or not
|
||
// to truncate the file.
|
||
//
|
||
// Also do not delete the SharedCacheMap if we got an error on the ValidDataLength
|
||
// callback. If we get a resource allocation failure or a retryable error (due to
|
||
// log file full?), we have no one to tell, so we must just loop back and try again.
|
||
// Of course all I/O errors are just too bad.
|
||
//
|
||
|
||
if ((SharedCacheMap->OpenCount == 0)
|
||
|
||
&&
|
||
|
||
(NT_SUCCESS(Status) || ((Status != STATUS_INSUFFICIENT_RESOURCES) && !RetryError(Status)))) {
|
||
|
||
CcReleaseMasterLock( LockHandle.OldIrql );
|
||
FsRtlAcquireFileExclusive( SharedCacheMap->FileObject );
|
||
CcAcquireMasterLock( &LockHandle.OldIrql );
|
||
|
||
//
|
||
// Now really see if we are to delete this SharedCacheMap. By having released
|
||
// first we avoid a deadlock with the file system when the FileObject is
|
||
// dereferenced. Note that CcDeleteSharedCacheMap requires that the
|
||
// CcMasterSpinLock already be acquired, and it releases it.
|
||
//
|
||
// Note that we must retest since we dropped and reacquired the master
|
||
// lock.
|
||
//
|
||
|
||
if ((SharedCacheMap->OpenCount == 0)
|
||
|
||
&&
|
||
|
||
((SharedCacheMap->DirtyPages == 0) || ((SharedCacheMap->FileSize.QuadPart == 0) &&
|
||
!FlagOn(SharedCacheMap->Flags, PIN_ACCESS)))) {
|
||
|
||
//
|
||
// Make sure to drop the requeue flag in case the write hit the timeout at
|
||
// the same time it finished everything up.
|
||
//
|
||
|
||
CcDeleteSharedCacheMap( SharedCacheMap, LockHandle.OldIrql, TRUE );
|
||
IoStatus->Information = 0;
|
||
SharedCacheMap = NULL;
|
||
|
||
} else {
|
||
|
||
CcReleaseMasterLock( LockHandle.OldIrql );
|
||
FsRtlReleaseFile( SharedCacheMap->FileObject );
|
||
CcAcquireMasterLock( &LockHandle.OldIrql );
|
||
}
|
||
}
|
||
|
||
//
|
||
// In the normal case, we just clear the flag on the way out if
|
||
// we will not requeue the workitem.
|
||
//
|
||
|
||
if (SharedCacheMap != NULL) {
|
||
|
||
if (IoStatus->Information != CC_REQUEUE) {
|
||
ClearFlag(SharedCacheMap->Flags, WRITE_QUEUED);
|
||
}
|
||
CcReleaseMasterLock( LockHandle.OldIrql );
|
||
}
|
||
|
||
DebugTrace(-1, me, "CcWriteBehind->VOID\n", 0 );
|
||
|
||
return;
|
||
}
|
||
|
||
|
||
LARGE_INTEGER
|
||
CcGetFlushedValidData (
|
||
IN PSECTION_OBJECT_POINTERS SectionObjectPointer,
|
||
IN BOOLEAN CcInternalCaller
|
||
)
|
||
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
This routine may be called by a file system to find out how far the Cache Manager
|
||
has flushed in the stream. More accurately, this routine returns either the FileOffset
|
||
of the lowest dirty page currently in the file.
|
||
|
||
NOTE that even though the routine takes SectionObjectPointer, the caller must insure
|
||
that the stream is cached and stays cached for the duration of this routine, much like
|
||
for the copy routines, etc.
|
||
|
||
Arguments:
|
||
|
||
SectionObjectPointer - A pointer to the Section Object Pointers
|
||
structure in the nonpaged Fcb.
|
||
|
||
CcInternalCaller - must be TRUE if the caller is coming from Cc, FALSE otherwise.
|
||
TRUE imples the need for self-synchronization.
|
||
|
||
Return Value:
|
||
|
||
The derived number for flushed ValidData, or MAXLONGLONG in the quad part if
|
||
the Section is not cached. (Naturally the caller can guarantee that this case
|
||
does not occur, and internal callers do.)
|
||
|
||
--*/
|
||
|
||
{
|
||
PSHARED_CACHE_MAP SharedCacheMap;
|
||
KLOCK_QUEUE_HANDLE LockHandle;
|
||
LARGE_INTEGER NewValidDataLength;
|
||
|
||
//
|
||
// External callers may be unsynchronized with this shared cache map
|
||
// perhaps going away underneath this call. NTFS and his
|
||
// pair of streams for compression-on-the-wire is a good example of
|
||
// someone who may be synchronized in one stream but needs to peek at
|
||
// the other.
|
||
//
|
||
|
||
if (!CcInternalCaller) {
|
||
|
||
CcAcquireMasterLock( &LockHandle.OldIrql );
|
||
|
||
SharedCacheMap = SectionObjectPointer->SharedCacheMap;
|
||
|
||
if (SharedCacheMap == NULL) {
|
||
CcReleaseMasterLock( LockHandle.OldIrql );
|
||
NewValidDataLength.QuadPart = MAXLONGLONG;
|
||
return NewValidDataLength;
|
||
}
|
||
|
||
CcIncrementOpenCount( SharedCacheMap, 'dfGS' );
|
||
CcReleaseMasterLock( LockHandle.OldIrql );
|
||
KeAcquireInStackQueuedSpinLock( &SharedCacheMap->BcbSpinLock, &LockHandle );
|
||
|
||
} else {
|
||
|
||
SharedCacheMap = SectionObjectPointer->SharedCacheMap;
|
||
}
|
||
|
||
ASSERT( SharedCacheMap != NULL );
|
||
|
||
//
|
||
// If the file is entirely clean, then we wish to return
|
||
// the new ValidDataLength as equal to ValidDataGoal.
|
||
//
|
||
|
||
NewValidDataLength = SharedCacheMap->ValidDataGoal;
|
||
|
||
//
|
||
// If there may be dirty pages we will look at the last Bcb in the
|
||
// descending-order Bcb list, and see if it describes data beyond
|
||
// ValidDataGoal.
|
||
//
|
||
// It is important to note that since we use DirtyPages as a faux
|
||
// reference count over some short windows (+1, -1) the simple
|
||
// fact it is nonzero does *not* mean the file is dirty.
|
||
//
|
||
// (This test is logically too conservative. For example, the last Bcb
|
||
// may not even be dirty (in which case we should look at its
|
||
// predecessor), or we may have earlier written valid data to this
|
||
// byte range (which also means if we knew this we could look at
|
||
// the predessor). This simply means that the Lazy Writer may not
|
||
// successfully get ValidDataLength updated in a file being randomly
|
||
// accessed until the level of file access dies down, or at the latest
|
||
// until the file is closed. However, security will never be
|
||
// compromised.)
|
||
//
|
||
|
||
if (SharedCacheMap->DirtyPages) {
|
||
|
||
PBITMAP_RANGE BitmapRange;
|
||
PBCB LastBcb;
|
||
PMBCB Mbcb = SharedCacheMap->Mbcb;
|
||
|
||
if ((Mbcb != NULL) && (Mbcb->DirtyPages != 0)) {
|
||
|
||
BitmapRange = CcFindBitmapRangeToClean( Mbcb, 0 );
|
||
|
||
ASSERT(BitmapRange->FirstDirtyPage != MAXULONG);
|
||
|
||
NewValidDataLength.QuadPart = (BitmapRange->BasePage + BitmapRange->FirstDirtyPage)
|
||
<< PAGE_SHIFT;
|
||
}
|
||
|
||
LastBcb = CONTAINING_RECORD( SharedCacheMap->BcbList.Flink,
|
||
BCB,
|
||
BcbLinks );
|
||
|
||
while (&LastBcb->BcbLinks != &SharedCacheMap->BcbList) {
|
||
|
||
if ((LastBcb->NodeTypeCode == CACHE_NTC_BCB) && LastBcb->Dirty) {
|
||
break;
|
||
}
|
||
|
||
LastBcb = CONTAINING_RECORD( LastBcb->BcbLinks.Flink,
|
||
BCB,
|
||
BcbLinks );
|
||
}
|
||
|
||
//
|
||
// Check the Base of the last entry.
|
||
//
|
||
|
||
if ((&LastBcb->BcbLinks != &SharedCacheMap->BcbList) &&
|
||
(LastBcb->FileOffset.QuadPart < NewValidDataLength.QuadPart )) {
|
||
|
||
NewValidDataLength = LastBcb->FileOffset;
|
||
}
|
||
}
|
||
|
||
if (!CcInternalCaller) {
|
||
|
||
//
|
||
// Remove our reference.
|
||
//
|
||
|
||
CcAcquireMasterLockAtDpcLevel();
|
||
CcDecrementOpenCount( SharedCacheMap, 'dfGF' );
|
||
|
||
if ((SharedCacheMap->OpenCount == 0) &&
|
||
!FlagOn(SharedCacheMap->Flags, WRITE_QUEUED) &&
|
||
(SharedCacheMap->DirtyPages == 0)) {
|
||
|
||
//
|
||
// Move to the dirty list.
|
||
//
|
||
|
||
RemoveEntryList( &SharedCacheMap->SharedCacheMapLinks );
|
||
InsertTailList( &CcDirtySharedCacheMapList.SharedCacheMapLinks,
|
||
&SharedCacheMap->SharedCacheMapLinks );
|
||
|
||
//
|
||
// Make sure the Lazy Writer will wake up, because we
|
||
// want him to delete this SharedCacheMap.
|
||
//
|
||
|
||
LazyWriter.OtherWork = TRUE;
|
||
if (!LazyWriter.ScanActive) {
|
||
CcScheduleLazyWriteScan( FALSE );
|
||
}
|
||
}
|
||
|
||
KeReleaseInStackQueuedSpinLockFromDpcLevel( &LockHandle );
|
||
CcReleaseMasterLock( LockHandle.OldIrql );
|
||
}
|
||
|
||
return NewValidDataLength;
|
||
}
|
||
|
||
|
||
VOID
|
||
CcFlushCache (
|
||
IN PSECTION_OBJECT_POINTERS SectionObjectPointer,
|
||
IN PLARGE_INTEGER FileOffset OPTIONAL,
|
||
IN ULONG Length,
|
||
OUT PIO_STATUS_BLOCK IoStatus OPTIONAL
|
||
)
|
||
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
This routine may be called to flush dirty data from the cache to the
|
||
cached file on disk. Any byte range within the file may be flushed,
|
||
or the entire file may be flushed by omitting the FileOffset parameter.
|
||
|
||
This routine does not take a Wait parameter; the caller should assume
|
||
that it will always block.
|
||
|
||
Arguments:
|
||
|
||
SectionObjectPointer - A pointer to the Section Object Pointers
|
||
structure in the nonpaged Fcb.
|
||
|
||
FileOffset - If this parameter is supplied (not NULL), then only the
|
||
byte range specified by FileOffset and Length are flushed.
|
||
If &CcNoDelay is specified, then this signifies the call
|
||
from the Lazy Writer, and the lazy write scan should resume
|
||
as normal from the last spot where it left off in the file.
|
||
|
||
Length - Defines the length of the byte range to flush, starting at
|
||
FileOffset. This parameter is ignored if FileOffset is
|
||
specified as NULL.
|
||
|
||
IoStatus - The I/O status resulting from the flush operation.
|
||
|
||
Return Value:
|
||
|
||
None.
|
||
|
||
--*/
|
||
|
||
{
|
||
LARGE_INTEGER NextFileOffset, TargetOffset;
|
||
ULONG NextLength;
|
||
PBCB FirstBcb;
|
||
KIRQL OldIrql;
|
||
PSHARED_CACHE_MAP SharedCacheMap;
|
||
IO_STATUS_BLOCK TrashStatus;
|
||
PVOID TempVa;
|
||
ULONG RemainingLength, TempLength;
|
||
NTSTATUS PopupStatus;
|
||
LOGICAL HotSpot;
|
||
ULONG BytesWritten = 0;
|
||
LOGICAL PopupRequired = FALSE;
|
||
LOGICAL VerifyRequired = FALSE;
|
||
LOGICAL IsLazyWriter = FALSE;
|
||
LOGICAL FreeActiveVacb = FALSE;
|
||
PVACB ActiveVacb = NULL;
|
||
NTSTATUS Status = STATUS_SUCCESS;
|
||
LARGE_INTEGER EndTick, CurrentTick;
|
||
|
||
DebugTrace(+1, me, "CcFlushCache:\n", 0 );
|
||
DebugTrace( 0, mm, " SectionObjectPointer = %08lx\n", SectionObjectPointer );
|
||
DebugTrace2(0, me, " FileOffset = %08lx, %08lx\n",
|
||
ARGUMENT_PRESENT(FileOffset) ? FileOffset->LowPart
|
||
: 0,
|
||
ARGUMENT_PRESENT(FileOffset) ? FileOffset->HighPart
|
||
: 0 );
|
||
DebugTrace( 0, me, " Length = %08lx\n", Length );
|
||
|
||
//
|
||
// If IoStatus passed a Null pointer, set up to through status away.
|
||
//
|
||
|
||
if (!ARGUMENT_PRESENT(IoStatus)) {
|
||
IoStatus = &TrashStatus;
|
||
}
|
||
IoStatus->Status = STATUS_SUCCESS;
|
||
IoStatus->Information = 0;
|
||
|
||
//
|
||
// See if this is the Lazy Writer. Since he wants to use this common
|
||
// routine, which is also a public routine callable by file systems,
|
||
// the Lazy Writer shows his call by specifying CcNoDelay as the file offset!
|
||
//
|
||
// Also, in case we do not write anything because we see only HotSpot(s),
|
||
// initialize the Status to indicate a retryable error, so CcWorkerThread
|
||
// knows we did not make any progress. Of course any actual flush will
|
||
// overwrite this code.
|
||
//
|
||
|
||
if (FileOffset == &CcNoDelay) {
|
||
IoStatus->Status = STATUS_VERIFY_REQUIRED;
|
||
IsLazyWriter = TRUE;
|
||
FileOffset = NULL;
|
||
}
|
||
|
||
CcAcquireMasterLock( &OldIrql );
|
||
|
||
SharedCacheMap = SectionObjectPointer->SharedCacheMap;
|
||
|
||
//
|
||
// Awareness is indicated by the lowbit of the FileOffset pointer.
|
||
// Non-awareness of a private write stream results in a no-op.
|
||
//
|
||
|
||
if ((SharedCacheMap != NULL) && FlagOn( SharedCacheMap->Flags, PRIVATE_WRITE )) {
|
||
|
||
if (((ULONG_PTR)FileOffset & 1) == 0) {
|
||
|
||
CcReleaseMasterLock( OldIrql );
|
||
return;
|
||
|
||
}
|
||
|
||
FileOffset = (PLARGE_INTEGER)((ULONG_PTR)FileOffset ^ 1);
|
||
|
||
}
|
||
|
||
//
|
||
// If there is nothing to do, return here.
|
||
//
|
||
|
||
if (ARGUMENT_PRESENT(FileOffset) && (Length == 0)) {
|
||
|
||
CcReleaseMasterLock( OldIrql );
|
||
DebugTrace(-1, me, "CcFlushCache -> VOID\n", 0 );
|
||
return;
|
||
}
|
||
|
||
//
|
||
// See if the file is cached.
|
||
//
|
||
|
||
if (SharedCacheMap != NULL) {
|
||
|
||
//
|
||
// Increment the open count to keep it from going away.
|
||
//
|
||
|
||
CcIncrementOpenCount( SharedCacheMap, 'fcCS' );
|
||
|
||
if ((SharedCacheMap->NeedToZero != NULL) || (SharedCacheMap->ActiveVacb != NULL)) {
|
||
|
||
ULONG FirstPage = 0;
|
||
ULONG LastPage = MAXULONG;
|
||
|
||
if (ARGUMENT_PRESENT(FileOffset)) {
|
||
|
||
FirstPage = (ULONG)(FileOffset->QuadPart >> PAGE_SHIFT);
|
||
LastPage = (ULONG)((FileOffset->QuadPart + Length - 1) >> PAGE_SHIFT);
|
||
}
|
||
|
||
//
|
||
// Make sure we do not flush the active page without zeroing any
|
||
// uninitialized data. Also, it is very important to free the active
|
||
// page if it is the one to be flushed, so that we get the dirty
|
||
// bit out to the Pfn.
|
||
//
|
||
|
||
if (((((LONGLONG)LastPage + 1) << PAGE_SHIFT) > SharedCacheMap->ValidDataGoal.QuadPart) ||
|
||
|
||
((SharedCacheMap->NeedToZero != NULL) &&
|
||
(FirstPage <= SharedCacheMap->NeedToZeroPage) &&
|
||
(LastPage >= SharedCacheMap->NeedToZeroPage)) ||
|
||
|
||
((SharedCacheMap->ActiveVacb != NULL) &&
|
||
(FirstPage <= SharedCacheMap->ActivePage) &&
|
||
(LastPage >= SharedCacheMap->ActivePage))) {
|
||
|
||
GetActiveVacbAtDpcLevel( SharedCacheMap, ActiveVacb, RemainingLength, TempLength );
|
||
FreeActiveVacb = TRUE;
|
||
}
|
||
}
|
||
}
|
||
|
||
CcReleaseMasterLock( OldIrql );
|
||
|
||
if (FreeActiveVacb) {
|
||
CcFreeActiveVacb( SharedCacheMap, ActiveVacb, RemainingLength, TempLength );
|
||
}
|
||
|
||
//
|
||
// If there is a user-mapped file, then we perform the "service" of
|
||
// flushing even data not written via the file system. Note that this
|
||
// is pretty important for folks provoking the flush/purge of a coherency
|
||
// operation.
|
||
//
|
||
// It is critical this happen before we examine our own hints. In the course
|
||
// of this flush it is possible valid data length will be advanced by the
|
||
// underlying filesystem, with CcZero'ing behind - which will cause us to
|
||
// make some dirty zeroes in the cache. Syscache bug! Note how coherency
|
||
// flushing works ...
|
||
//
|
||
|
||
if ((SharedCacheMap == NULL)
|
||
|
||
||
|
||
|
||
FlagOn(((PFSRTL_COMMON_FCB_HEADER)(SharedCacheMap->FileObject->FsContext))->Flags,
|
||
FSRTL_FLAG_USER_MAPPED_FILE) && !IsLazyWriter) {
|
||
|
||
//
|
||
// Call MM to flush the section through our view.
|
||
//
|
||
|
||
DebugTrace( 0, mm, "MmFlushSection:\n", 0 );
|
||
DebugTrace( 0, mm, " SectionObjectPointer = %08lx\n", SectionObjectPointer );
|
||
DebugTrace2(0, me, " FileOffset = %08lx, %08lx\n",
|
||
ARGUMENT_PRESENT(FileOffset) ? FileOffset->LowPart
|
||
: 0,
|
||
ARGUMENT_PRESENT(FileOffset) ? FileOffset->HighPart
|
||
: 0 );
|
||
DebugTrace( 0, mm, " RegionSize = %08lx\n", Length );
|
||
|
||
Status = MmFlushSection( SectionObjectPointer,
|
||
FileOffset,
|
||
Length,
|
||
IoStatus,
|
||
TRUE );
|
||
|
||
if ((!NT_SUCCESS(IoStatus->Status)) && !RetryError(IoStatus->Status)) {
|
||
|
||
PopupRequired = TRUE;
|
||
PopupStatus = IoStatus->Status;
|
||
}
|
||
|
||
DebugTrace2(0, mm, " <IoStatus = %08lx, %08lx\n",
|
||
IoStatus->Status, IoStatus->Information );
|
||
}
|
||
|
||
//
|
||
// Scan for dirty pages if there is a shared cache map.
|
||
//
|
||
|
||
if (SharedCacheMap != NULL) {
|
||
|
||
//
|
||
// If FileOffset was not specified then set to flush entire region
|
||
// and set valid data length to the goal so that we will not get
|
||
// any more call backs.
|
||
//
|
||
|
||
if (!IsLazyWriter && !ARGUMENT_PRESENT(FileOffset)) {
|
||
|
||
SharedCacheMap->ValidDataLength = SharedCacheMap->ValidDataGoal;
|
||
}
|
||
|
||
//
|
||
// If this is an explicit flush, initialize our offset to scan for.
|
||
//
|
||
|
||
if (ARGUMENT_PRESENT(FileOffset)) {
|
||
TargetOffset = *FileOffset;
|
||
}
|
||
|
||
//
|
||
// Assume we want to pass the explicit flush flag in Length.
|
||
// But overwrite it if a length really was specified. On
|
||
// subsequent loops, NextLength will have some nonzero value.
|
||
//
|
||
|
||
NextLength = 1;
|
||
if (Length != 0) {
|
||
NextLength = Length;
|
||
}
|
||
|
||
//
|
||
// Now calculate the tick that will signal the expiration of a
|
||
// lazy writer tick interval.
|
||
//
|
||
|
||
if (IsLazyWriter) {
|
||
|
||
KeQueryTickCount( &EndTick );
|
||
EndTick.QuadPart += CcIdleDelayTick;
|
||
}
|
||
|
||
//
|
||
// Loop as long as we find buffers to flush for this
|
||
// SharedCacheMap, and we are not trying to delete the guy.
|
||
//
|
||
|
||
while (((SharedCacheMap->PagesToWrite != 0) || !IsLazyWriter)
|
||
|
||
&&
|
||
((SharedCacheMap->FileSize.QuadPart != 0) ||
|
||
FlagOn(SharedCacheMap->Flags, PIN_ACCESS))
|
||
|
||
&&
|
||
|
||
!VerifyRequired
|
||
|
||
&&
|
||
|
||
CcAcquireByteRangeForWrite ( SharedCacheMap,
|
||
IsLazyWriter ? NULL : (ARGUMENT_PRESENT(FileOffset) ?
|
||
&TargetOffset : NULL),
|
||
IsLazyWriter ? 0: NextLength,
|
||
&NextFileOffset,
|
||
&NextLength,
|
||
&FirstBcb )) {
|
||
|
||
//
|
||
// Assume this range is not a hot spot.
|
||
//
|
||
|
||
HotSpot = FALSE;
|
||
|
||
//
|
||
// We defer calling Mm to set address range modified until here, to take
|
||
// overhead out of the main line path, and to reduce the number of TBIS
|
||
// on a multiprocessor.
|
||
//
|
||
|
||
RemainingLength = NextLength;
|
||
|
||
do {
|
||
|
||
//
|
||
// See if the next file offset is mapped. (If not, the dirty bit
|
||
// was propagated on the unmap.)
|
||
//
|
||
|
||
if ((TempVa = CcGetVirtualAddressIfMapped( SharedCacheMap,
|
||
NextFileOffset.QuadPart + NextLength - RemainingLength,
|
||
&ActiveVacb,
|
||
&TempLength)) != NULL) {
|
||
|
||
//
|
||
// Reduce TempLength to RemainingLength if necessary, and
|
||
// call MM.
|
||
//
|
||
|
||
if (TempLength > RemainingLength) {
|
||
TempLength = RemainingLength;
|
||
}
|
||
|
||
//
|
||
// Clear the Dirty bit (if set) in the PTE and set the
|
||
// Pfn modified. Assume if the Pte was dirty, that this may
|
||
// be a hot spot. Do not do hot spots for metadata, and unless
|
||
// they are within ValidDataLength as reported to the file system
|
||
// via CcSetValidData.
|
||
//
|
||
|
||
HotSpot = (BOOLEAN)(((MmSetAddressRangeModified(TempVa, TempLength) || HotSpot) &&
|
||
((NextFileOffset.QuadPart + NextLength) <
|
||
(SharedCacheMap->ValidDataLength.QuadPart)) &&
|
||
((SharedCacheMap->LazyWritePassCount & 0xF) != 0) &&
|
||
IsLazyWriter) &&
|
||
!FlagOn(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED));
|
||
|
||
CcFreeVirtualAddress( ActiveVacb );
|
||
|
||
} else {
|
||
|
||
//
|
||
// Reduce TempLength to RemainingLength if necessary.
|
||
//
|
||
|
||
if (TempLength > RemainingLength) {
|
||
TempLength = RemainingLength;
|
||
}
|
||
}
|
||
|
||
//
|
||
// Reduce RemainingLength by what we processed.
|
||
//
|
||
|
||
RemainingLength -= TempLength;
|
||
|
||
//
|
||
// Loop until done.
|
||
//
|
||
|
||
} while (RemainingLength != 0);
|
||
|
||
CcLazyWriteHotSpots += HotSpot;
|
||
|
||
//
|
||
// Now flush, now flush if we do not think it is a hot spot.
|
||
//
|
||
|
||
if (!HotSpot) {
|
||
|
||
MmFlushSection( SharedCacheMap->FileObject->SectionObjectPointer,
|
||
&NextFileOffset,
|
||
NextLength,
|
||
IoStatus,
|
||
!IsLazyWriter );
|
||
|
||
if (NT_SUCCESS(IoStatus->Status)) {
|
||
|
||
if (!FlagOn(SharedCacheMap->Flags, LAZY_WRITE_OCCURRED)) {
|
||
|
||
CcAcquireMasterLock( &OldIrql );
|
||
SetFlag(SharedCacheMap->Flags, LAZY_WRITE_OCCURRED);
|
||
CcReleaseMasterLock( OldIrql );
|
||
}
|
||
|
||
//
|
||
// Increment performance counters
|
||
//
|
||
|
||
if (IsLazyWriter) {
|
||
|
||
CcLazyWriteIos += 1;
|
||
CcLazyWritePages += (NextLength + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
||
}
|
||
|
||
} else {
|
||
|
||
LARGE_INTEGER Offset = NextFileOffset;
|
||
ULONG RetryLength = NextLength;
|
||
|
||
DebugTrace2( 0, 0, "I/O Error on Cache Flush: %08lx, %08lx\n",
|
||
IoStatus->Status, IoStatus->Information );
|
||
|
||
if (RetryError(IoStatus->Status)) {
|
||
|
||
VerifyRequired = TRUE;
|
||
|
||
//
|
||
// Loop to write each page individually, starting with one
|
||
// more try on the page that got the error, in case that page
|
||
// or any page beyond it can be successfully written
|
||
// individually. Note that Offset and RetryLength are
|
||
// guaranteed to be in integral pages, but the Information
|
||
// field from the failed request is not.
|
||
//
|
||
// We ignore errors now, and give it one last shot, before
|
||
// setting the pages clean (see below).
|
||
//
|
||
|
||
} else {
|
||
|
||
do {
|
||
|
||
DebugTrace2( 0, 0, "Trying page at offset %08lx, %08lx\n",
|
||
Offset.LowPart, Offset.HighPart );
|
||
|
||
MmFlushSection ( SharedCacheMap->FileObject->SectionObjectPointer,
|
||
&Offset,
|
||
PAGE_SIZE,
|
||
IoStatus,
|
||
!IsLazyWriter );
|
||
|
||
DebugTrace2( 0, 0, "I/O status = %08lx, %08lx\n",
|
||
IoStatus->Status, IoStatus->Information );
|
||
|
||
if (NT_SUCCESS(IoStatus->Status)) {
|
||
CcAcquireMasterLock( &OldIrql );
|
||
SetFlag(SharedCacheMap->Flags, LAZY_WRITE_OCCURRED);
|
||
CcReleaseMasterLock( OldIrql );
|
||
}
|
||
|
||
if ((!NT_SUCCESS(IoStatus->Status)) && !RetryError(IoStatus->Status)) {
|
||
|
||
PopupRequired = TRUE;
|
||
PopupStatus = IoStatus->Status;
|
||
}
|
||
|
||
VerifyRequired = VerifyRequired || RetryError(IoStatus->Status);
|
||
|
||
Offset.QuadPart = Offset.QuadPart + (LONGLONG)PAGE_SIZE;
|
||
RetryLength -= PAGE_SIZE;
|
||
|
||
} while(RetryLength > 0);
|
||
}
|
||
}
|
||
}
|
||
|
||
//
|
||
// Now release the Bcb resources and set them clean. Note we do not check
|
||
// here for errors, and just returned in the I/O status. Errors on writes
|
||
// are rare to begin with. Nonetheless, our strategy is to rely on
|
||
// one or more of the following (depending on the file system) to prevent
|
||
// errors from getting to us.
|
||
//
|
||
// - Retries and/or other forms of error recovery in the disk driver
|
||
// - Mirroring driver
|
||
// - Hot fixing in the noncached path of the file system
|
||
//
|
||
// In the unexpected case that a write error does get through, we
|
||
// *currently* just set the Bcbs clean anyway, rather than let
|
||
// Bcbs and pages accumulate which cannot be written. Note we did
|
||
// a popup above to at least notify the guy.
|
||
//
|
||
// Set the pages dirty again if we either saw a HotSpot or got
|
||
// verify required.
|
||
//
|
||
|
||
CcReleaseByteRangeFromWrite ( SharedCacheMap,
|
||
&NextFileOffset,
|
||
NextLength,
|
||
FirstBcb,
|
||
(BOOLEAN)(HotSpot || VerifyRequired) );
|
||
|
||
//
|
||
// See if there is any deferred writes we should post.
|
||
//
|
||
|
||
BytesWritten += NextLength;
|
||
if ((BytesWritten >= 0x40000) && !IsListEmpty(&CcDeferredWrites)) {
|
||
CcPostDeferredWrites();
|
||
BytesWritten = 0;
|
||
}
|
||
|
||
//
|
||
// If we're the lazy writer and have spent more than the active tick
|
||
// length in this loop, break out for a requeue so we share the
|
||
// file resources.
|
||
//
|
||
|
||
if (IsLazyWriter) {
|
||
|
||
KeQueryTickCount( &CurrentTick );
|
||
|
||
if (CurrentTick.QuadPart > EndTick.QuadPart) {
|
||
IoStatus->Information = CC_REQUEUE;
|
||
break;
|
||
}
|
||
}
|
||
|
||
//
|
||
// Now for explicit flushes, we should advance our range.
|
||
//
|
||
|
||
if (ARGUMENT_PRESENT(FileOffset)) {
|
||
|
||
NextFileOffset.QuadPart += NextLength;
|
||
|
||
//
|
||
// Done yet?
|
||
//
|
||
|
||
if ((FileOffset->QuadPart + Length) <= NextFileOffset.QuadPart) {
|
||
break;
|
||
}
|
||
|
||
//
|
||
// Calculate new target range
|
||
//
|
||
|
||
NextLength = (ULONG)((FileOffset->QuadPart + Length) - NextFileOffset.QuadPart);
|
||
TargetOffset = NextFileOffset;
|
||
}
|
||
}
|
||
}
|
||
|
||
//
|
||
// See if there are any deferred writes we should post if
|
||
// we escaped the loop without checking after a series of
|
||
// flushes.
|
||
//
|
||
|
||
if (BytesWritten != 0 && !IsListEmpty(&CcDeferredWrites)) {
|
||
|
||
CcPostDeferredWrites();
|
||
}
|
||
|
||
//
|
||
// Now we can get rid of the open count, and clean up as required.
|
||
//
|
||
|
||
if (SharedCacheMap != NULL) {
|
||
|
||
//
|
||
// Serialize again to decrement the open count.
|
||
//
|
||
|
||
CcAcquireMasterLock( &OldIrql );
|
||
|
||
CcDecrementOpenCount( SharedCacheMap, 'fcCF' );
|
||
|
||
if ((SharedCacheMap->OpenCount == 0) &&
|
||
!FlagOn(SharedCacheMap->Flags, WRITE_QUEUED) &&
|
||
(SharedCacheMap->DirtyPages == 0)) {
|
||
|
||
//
|
||
// Move to the dirty list.
|
||
//
|
||
|
||
RemoveEntryList( &SharedCacheMap->SharedCacheMapLinks );
|
||
InsertTailList( &CcDirtySharedCacheMapList.SharedCacheMapLinks,
|
||
&SharedCacheMap->SharedCacheMapLinks );
|
||
|
||
//
|
||
// Make sure the Lazy Writer will wake up, because we
|
||
// want him to delete this SharedCacheMap.
|
||
//
|
||
|
||
LazyWriter.OtherWork = TRUE;
|
||
if (!LazyWriter.ScanActive) {
|
||
CcScheduleLazyWriteScan( FALSE );
|
||
}
|
||
}
|
||
|
||
CcReleaseMasterLock( OldIrql );
|
||
}
|
||
|
||
//
|
||
// Make sure and return the first error to our caller. In the
|
||
// case of the Lazy Writer, a popup will be issued.
|
||
//
|
||
|
||
if (PopupRequired) {
|
||
IoStatus->Status = PopupStatus;
|
||
}
|
||
|
||
DebugTrace(-1, me, "CcFlushCache -> VOID\n", 0 );
|
||
|
||
return;
|
||
}
|
||
|
||
|
||
PVOID
|
||
CcRemapBcb (
|
||
IN PVOID Bcb
|
||
)
|
||
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
This routine may be called by a file system to map a Bcb an additional
|
||
time in order to preserve it through several calls that perform additional
|
||
maps and unpins.
|
||
|
||
|
||
Arguments:
|
||
|
||
Bcb - Supplies a pointer to a previously returned Bcb.
|
||
|
||
Return Value:
|
||
|
||
Bcb with read-only indicator.
|
||
|
||
--*/
|
||
|
||
{
|
||
KIRQL OldIrql;
|
||
PVACB Vacb;
|
||
|
||
//
|
||
// Remove read-only bit
|
||
//
|
||
|
||
Bcb = (PVOID) ((ULONG_PTR)Bcb & ~1);
|
||
|
||
if (((PBCB)Bcb)->NodeTypeCode == CACHE_NTC_OBCB) {
|
||
|
||
//
|
||
// If this is an overlapped BCB, use the first Vacb in the
|
||
// array
|
||
//
|
||
|
||
Vacb = ((POBCB)Bcb)->Bcbs[0]->Vacb;
|
||
|
||
} else if (((PBCB)Bcb)->NodeTypeCode == CACHE_NTC_BCB) {
|
||
|
||
//
|
||
// If this is a BCB, extract the Vcb from it
|
||
//
|
||
|
||
Vacb = ((PBCB)Bcb)->Vacb;
|
||
|
||
} else {
|
||
|
||
//
|
||
// Otherwise, there is no signature to match. Assume
|
||
// it is a Vacb.
|
||
//
|
||
|
||
Vacb = (PVACB) Bcb;
|
||
}
|
||
|
||
ASSERT((Vacb >= CcVacbs) && (Vacb < CcBeyondVacbs));
|
||
|
||
//
|
||
// Safely bump the active count
|
||
//
|
||
|
||
CcAcquireVacbLock( &OldIrql );
|
||
|
||
Vacb->Overlay.ActiveCount += 1;
|
||
|
||
CcReleaseVacbLock( OldIrql );
|
||
|
||
return (PVOID) ((ULONG_PTR)Vacb | 1);
|
||
}
|
||
|
||
|
||
VOID
|
||
CcRepinBcb (
|
||
IN PVOID Bcb
|
||
)
|
||
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
This routine may be called by a file system to pin a Bcb an additional
|
||
time in order to reserve it for Write Through or error recovery.
|
||
Typically the file system would do this the first time that it sets a
|
||
pinned buffer dirty while processing a WriteThrough request, or any
|
||
time that it determines that a buffer will be required for WriteThrough.
|
||
|
||
The call to this routine must be followed by a call to CcUnpinRepinnedBcb.
|
||
CcUnpinRepinnedBcb should normally be called during request completion
|
||
after all other resources have been released. CcUnpinRepinnedBcb
|
||
synchronously writes the buffer (for WriteThrough requests) and performs
|
||
the matching unpin for this call.
|
||
|
||
Arguments:
|
||
|
||
Bcb - Supplies a pointer to a previously pinned Bcb
|
||
|
||
Return Value:
|
||
|
||
None.
|
||
|
||
--*/
|
||
|
||
{
|
||
KLOCK_QUEUE_HANDLE LockHandle;
|
||
|
||
KeAcquireInStackQueuedSpinLock( &((PBCB)Bcb)->SharedCacheMap->BcbSpinLock, &LockHandle );
|
||
|
||
((PBCB)Bcb)->PinCount += 1;
|
||
|
||
KeReleaseInStackQueuedSpinLock( &LockHandle );
|
||
}
|
||
|
||
|
||
VOID
|
||
CcUnpinRepinnedBcb (
|
||
IN PVOID Bcb,
|
||
IN BOOLEAN WriteThrough,
|
||
OUT PIO_STATUS_BLOCK IoStatus
|
||
)
|
||
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
This routine may be called to Write a previously pinned buffer
|
||
through to the file. It must have been preceded by a call to
|
||
CcRepinBcb. As this routine must acquire the Bcb
|
||
resource exclusive, the caller must be extremely careful to avoid
|
||
deadlocks. Ideally the caller owns no resources at all when it
|
||
calls this routine, or else the caller should guarantee that it
|
||
has nothing else pinned in this same file. (The latter rule is
|
||
the one used to avoid deadlocks in calls from CcCopyWrite and
|
||
CcMdlWrite.)
|
||
|
||
Arguments:
|
||
|
||
Bcb - Pointer to a Bcb which was previously specified in a call
|
||
to CcRepinBcb.
|
||
|
||
WriteThrough - TRUE if the Bcb should be written through.
|
||
|
||
IoStatus - Returns the I/O status for the operation.
|
||
|
||
Return Value:
|
||
|
||
None.
|
||
|
||
--*/
|
||
|
||
{
|
||
PSHARED_CACHE_MAP SharedCacheMap = ((PBCB)Bcb)->SharedCacheMap;
|
||
|
||
DebugTrace(+1, me, "CcUnpinRepinnedBcb\n", 0 );
|
||
DebugTrace( 0, me, " Bcb = %08lx\n", Bcb );
|
||
DebugTrace( 0, me, " WriteThrough = %02lx\n", WriteThrough );
|
||
|
||
//
|
||
// Set status to success for non write through case.
|
||
//
|
||
|
||
IoStatus->Status = STATUS_SUCCESS;
|
||
|
||
if (WriteThrough) {
|
||
|
||
//
|
||
// Acquire Bcb exclusive to eliminate possible modifiers of the buffer,
|
||
// since we are about to write its buffer.
|
||
//
|
||
|
||
if (FlagOn(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED)) {
|
||
ExAcquireResourceExclusiveLite( &((PBCB)Bcb)->Resource, TRUE );
|
||
}
|
||
|
||
//
|
||
// Now, there is a chance that the LazyWriter has already written
|
||
// it, since the resource was free. We will only write it if it
|
||
// is still dirty.
|
||
//
|
||
|
||
if (((PBCB)Bcb)->Dirty) {
|
||
|
||
//
|
||
// First we make sure that the dirty bit in the PFN database is set.
|
||
//
|
||
|
||
ASSERT( ((PBCB)Bcb)->BaseAddress != NULL );
|
||
MmSetAddressRangeModified( ((PBCB)Bcb)->BaseAddress,
|
||
((PBCB)Bcb)->ByteLength );
|
||
|
||
//
|
||
// Now release the Bcb resource and set it clean. Note we do not check
|
||
// here for errors, and just return the I/O status. Errors on writes
|
||
// are rare to begin with. Nonetheless, our strategy is to rely on
|
||
// one or more of the following (depending on the file system) to prevent
|
||
// errors from getting to us.
|
||
//
|
||
// - Retries and/or other forms of error recovery in the disk driver
|
||
// - Mirroring driver
|
||
// - Hot fixing in the noncached path of the file system
|
||
//
|
||
// In the unexpected case that a write error does get through, we
|
||
// report it to our caller, but go ahead and set the Bcb clean. There
|
||
// seems to be no point in letting Bcbs (and pages in physical memory)
|
||
// accumulate which can never go away because we get an unrecoverable I/O
|
||
// error.
|
||
//
|
||
|
||
//
|
||
// We specify TRUE here for ReadOnly so that we will keep the
|
||
// resource during the flush.
|
||
//
|
||
|
||
CcUnpinFileData( (PBCB)Bcb, TRUE, SET_CLEAN );
|
||
|
||
//
|
||
// Write it out.
|
||
//
|
||
|
||
MmFlushSection( ((PBCB)Bcb)->SharedCacheMap->FileObject->SectionObjectPointer,
|
||
&((PBCB)Bcb)->FileOffset,
|
||
((PBCB)Bcb)->ByteLength,
|
||
IoStatus,
|
||
TRUE );
|
||
|
||
//
|
||
// If we got verify required, we have to mark the buffer dirty again
|
||
// so we will try again later.
|
||
//
|
||
|
||
if (RetryError(IoStatus->Status)) {
|
||
CcSetDirtyPinnedData( (PBCB)Bcb, NULL );
|
||
}
|
||
|
||
//
|
||
// Now remove the final pin count now that we have set it clean.
|
||
//
|
||
|
||
CcUnpinFileData( (PBCB)Bcb, FALSE, UNPIN );
|
||
|
||
//
|
||
// See if there is any deferred writes we can post.
|
||
//
|
||
|
||
if (!IsListEmpty(&CcDeferredWrites)) {
|
||
CcPostDeferredWrites();
|
||
}
|
||
}
|
||
else {
|
||
|
||
//
|
||
// Lazy Writer got there first, just free the resource and unpin.
|
||
//
|
||
|
||
CcUnpinFileData( (PBCB)Bcb, FALSE, UNPIN );
|
||
|
||
}
|
||
|
||
DebugTrace2(0, me, " <IoStatus = %08lx, %08lx\n", IoStatus->Status,
|
||
IoStatus->Information );
|
||
}
|
||
|
||
//
|
||
// Non-WriteThrough case
|
||
//
|
||
|
||
else {
|
||
|
||
CcUnpinFileData( (PBCB)Bcb, TRUE, UNPIN );
|
||
|
||
//
|
||
// Set status to success for non write through case.
|
||
//
|
||
|
||
IoStatus->Status = STATUS_SUCCESS;
|
||
}
|
||
|
||
DebugTrace(-1, me, "CcUnpinRepinnedBcb -> VOID\n", 0 );
|
||
}
|
||
|
||
|
||
//
|
||
// Internal Support Routine
|
||
//
|
||
|
||
BOOLEAN
|
||
CcFindBcb (
|
||
IN PSHARED_CACHE_MAP SharedCacheMap,
|
||
IN PLARGE_INTEGER FileOffset,
|
||
IN OUT PLARGE_INTEGER BeyondLastByte,
|
||
OUT PBCB *Bcb
|
||
)
|
||
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
This routine is called to find a Bcb describing the specified byte range
|
||
of a file. It returns TRUE if it could at least find a Bcb which describes
|
||
the beginning of the specified byte range, or else FALSE if the first
|
||
part of the byte range is not present. In the latter case, the requested
|
||
byte range (TrialLength) is truncated if there is currently a Bcb which
|
||
describes bytes beyond the beginning of the byte range.
|
||
|
||
The caller may see if the entire byte range is being returned by examining
|
||
the Bcb, and the caller (or caller's caller) may then make subsequent
|
||
calls if the data is not all returned.
|
||
|
||
The BcbSpinLock must be currently acquired.
|
||
|
||
Arguments:
|
||
|
||
SharedCacheMap - Supplies a pointer to the SharedCacheMap for the file
|
||
in which the byte range is desired.
|
||
|
||
FileOffset - Supplies the file offset for the beginning of the desired
|
||
byte range.
|
||
|
||
BeyondLastByte - Supplies the file offset of the ending of the desired
|
||
byte range + 1. Note that this offset will be truncated
|
||
on return if the Bcb was not found, but bytes beyond the
|
||
beginning of the Bcb are contained in another Bcb.
|
||
|
||
Bcb - returns a Bcb describing the beginning of the byte range if also
|
||
returning TRUE, or else the point in the Bcb list to insert after.
|
||
|
||
Return Value:
|
||
|
||
FALSE - if no Bcb describes the beginning of the desired byte range
|
||
|
||
TRUE - if a Bcb is being returned describing at least an initial
|
||
part of the byte range.
|
||
|
||
--*/
|
||
|
||
{
|
||
PLIST_ENTRY BcbList;
|
||
PBCB Bcbt;
|
||
BOOLEAN Found = FALSE;
|
||
|
||
DebugTrace(+1, me, "CcFindBcb:\n", 0 );
|
||
DebugTrace( 0, me, " SharedCacheMap = %08lx\n", SharedCacheMap );
|
||
DebugTrace2(0, me, " FileOffset = %08lx, %08lx\n", FileOffset->LowPart,
|
||
FileOffset->HighPart );
|
||
DebugTrace2(0, me, " TrialLength = %08lx, %08lx\n", TrialLength->LowPart,
|
||
TrialLength->HighPart );
|
||
|
||
//
|
||
// We want to terminate scans by testing the NodeTypeCode field from the
|
||
// BcbLinks, so we want to see the SharedCacheMap signature from the same
|
||
// offset.
|
||
//
|
||
|
||
ASSERT(FIELD_OFFSET(SHARED_CACHE_MAP, BcbList) == FIELD_OFFSET(BCB, BcbLinks));
|
||
|
||
//
|
||
// Similarly, when we hit one of the BcbListHeads in the array, small negative
|
||
// offsets are all structure pointers, so we are counting on the Bcb signature
|
||
// to have some non-Ulong address bits set.
|
||
//
|
||
|
||
ASSERT((CACHE_NTC_BCB & 3) != 0);
|
||
|
||
//
|
||
// Get address of Bcb listhead that is *after* the Bcb we are looking for,
|
||
// for backwards scan. It is important that we fail in the forward
|
||
// direction so that we are looking in the right segment of the Bcb list.
|
||
//
|
||
|
||
BcbList = GetBcbListHead( SharedCacheMap, FileOffset->QuadPart + SIZE_PER_BCB_LIST, TRUE );
|
||
|
||
//
|
||
// Search for an entry that overlaps the specified range, or until we hit
|
||
// a listhead.
|
||
//
|
||
|
||
Bcbt = CONTAINING_RECORD(BcbList->Flink, BCB, BcbLinks);
|
||
|
||
//
|
||
// First see if we really have to do Large arithmetic or not, and
|
||
// then use either a 32-bit loop or a 64-bit loop to search for
|
||
// the Bcb.
|
||
//
|
||
|
||
if (FileOffset->HighPart == 0 &&
|
||
Bcbt->NodeTypeCode == CACHE_NTC_BCB &&
|
||
Bcbt->BeyondLastByte.HighPart == 0) {
|
||
|
||
//
|
||
// 32-bit - loop until we get back to a listhead.
|
||
//
|
||
|
||
while (Bcbt->NodeTypeCode == CACHE_NTC_BCB) {
|
||
|
||
//
|
||
// Since the Bcb list is in descending order, we first check
|
||
// if we are completely beyond the current entry, and if so
|
||
// get out.
|
||
//
|
||
|
||
if (FileOffset->LowPart >= Bcbt->BeyondLastByte.LowPart) {
|
||
break;
|
||
}
|
||
|
||
//
|
||
// Next check if the first byte we are looking for is
|
||
// contained in the current Bcb. If so, we either have
|
||
// a partial hit and must truncate to the exact amount
|
||
// we have found, or we may have a complete hit. In
|
||
// either case we break with Found == TRUE.
|
||
//
|
||
|
||
if (FileOffset->LowPart >= Bcbt->FileOffset.LowPart) {
|
||
Found = TRUE;
|
||
break;
|
||
}
|
||
|
||
//
|
||
// Now we know we must loop back and keep looking, but we
|
||
// still must check for the case where the tail end of the
|
||
// bytes we are looking for are described by the current
|
||
// Bcb. If so we must truncate what we are looking for,
|
||
// because this routine is only supposed to return bytes
|
||
// from the start of the desired range.
|
||
//
|
||
|
||
if (BeyondLastByte->LowPart >= Bcbt->FileOffset.LowPart) {
|
||
BeyondLastByte->LowPart = Bcbt->FileOffset.LowPart;
|
||
}
|
||
|
||
//
|
||
// Advance to next entry in list (which is possibly back to
|
||
// the listhead) and loop back.
|
||
//
|
||
|
||
Bcbt = CONTAINING_RECORD( Bcbt->BcbLinks.Flink,
|
||
BCB,
|
||
BcbLinks );
|
||
|
||
}
|
||
|
||
} else {
|
||
|
||
//
|
||
// 64-bit - Loop until we get back to a listhead.
|
||
//
|
||
|
||
while (Bcbt->NodeTypeCode == CACHE_NTC_BCB) {
|
||
|
||
//
|
||
// Since the Bcb list is in descending order, we first check
|
||
// if we are completely beyond the current entry, and if so
|
||
// get out.
|
||
//
|
||
|
||
if (FileOffset->QuadPart >= Bcbt->BeyondLastByte.QuadPart) {
|
||
break;
|
||
}
|
||
|
||
//
|
||
// Next check if the first byte we are looking for is
|
||
// contained in the current Bcb. If so, we either have
|
||
// a partial hit and must truncate to the exact amount
|
||
// we have found, or we may have a complete hit. In
|
||
// either case we break with Found == TRUE.
|
||
//
|
||
|
||
if (FileOffset->QuadPart >= Bcbt->FileOffset.QuadPart) {
|
||
Found = TRUE;
|
||
break;
|
||
}
|
||
|
||
//
|
||
// Now we know we must loop back and keep looking, but we
|
||
// still must check for the case where the tail end of the
|
||
// bytes we are looking for are described by the current
|
||
// Bcb. If so we must truncate what we are looking for,
|
||
// because this routine is only supposed to return bytes
|
||
// from the start of the desired range.
|
||
//
|
||
|
||
if (BeyondLastByte->QuadPart >= Bcbt->FileOffset.QuadPart) {
|
||
BeyondLastByte->QuadPart = Bcbt->FileOffset.QuadPart;
|
||
}
|
||
|
||
//
|
||
// Advance to next entry in list (which is possibly back to
|
||
// the listhead) and loop back.
|
||
//
|
||
|
||
Bcbt = CONTAINING_RECORD( Bcbt->BcbLinks.Flink,
|
||
BCB,
|
||
BcbLinks );
|
||
|
||
}
|
||
}
|
||
|
||
*Bcb = Bcbt;
|
||
|
||
DebugTrace2(0, me, " <TrialLength = %08lx, %08lx\n", TrialLength->LowPart,
|
||
TrialLength->HighPart );
|
||
DebugTrace( 0, me, " <Bcb = %08lx\n", *Bcb );
|
||
DebugTrace(-1, me, "CcFindBcb -> %02lx\n", Found );
|
||
|
||
return Found;
|
||
}
|
||
|
||
|
||
//
|
||
// Internal Support Routine
|
||
//
|
||
|
||
PBCB
|
||
CcAllocateInitializeBcb (
|
||
IN OUT PSHARED_CACHE_MAP SharedCacheMap OPTIONAL,
|
||
IN OUT PBCB AfterBcb,
|
||
IN PLARGE_INTEGER FileOffset,
|
||
IN PLARGE_INTEGER TrialLength
|
||
)
|
||
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
This routine allocates and initializes a Bcb to describe the specified
|
||
byte range, and inserts it into the Bcb List of the specified Shared
|
||
Cache Map. The Bcb List spin lock must currently be acquired.
|
||
|
||
BcbSpinLock must be acquired on entry.
|
||
|
||
Arguments:
|
||
|
||
SharedCacheMap - Supplies the SharedCacheMap for the new Bcb.
|
||
|
||
AfterBcb - Supplies where in the descending-order BcbList the new Bcb
|
||
should be inserted: either the ListHead (masquerading as
|
||
a Bcb) or a Bcb.
|
||
|
||
FileOffset - Supplies File Offset for the desired data.
|
||
|
||
TrialLength - Supplies length of desired data.
|
||
|
||
Return Value:
|
||
|
||
Address of the allocated and initialized Bcb
|
||
|
||
--*/
|
||
|
||
{
|
||
PBCB Bcb;
|
||
ULONG RoundedBcbSize = (sizeof(BCB) + 7) & ~7;
|
||
|
||
if ((Bcb = ExAllocatePoolWithTag( NonPagedPool, sizeof(BCB), 'cBcC')) == NULL) {
|
||
|
||
return NULL;
|
||
}
|
||
|
||
//
|
||
// Initialize the newly allocated Bcb. First zero it, then fill in
|
||
// nonzero fields.
|
||
//
|
||
|
||
RtlZeroMemory( Bcb, RoundedBcbSize );
|
||
|
||
//
|
||
// For Mbcb's, SharedCacheMap is NULL, and the rest of this initialization
|
||
// is not desired.
|
||
//
|
||
|
||
if (SharedCacheMap != NULL) {
|
||
|
||
Bcb->NodeTypeCode = CACHE_NTC_BCB;
|
||
Bcb->FileOffset = *FileOffset;
|
||
Bcb->ByteLength = TrialLength->LowPart;
|
||
Bcb->BeyondLastByte.QuadPart = FileOffset->QuadPart + TrialLength->QuadPart;
|
||
Bcb->PinCount += 1;
|
||
ExInitializeResourceLite( &Bcb->Resource );
|
||
Bcb->SharedCacheMap = SharedCacheMap;
|
||
|
||
//
|
||
// Since CcCalculateVacbLockCount has to be able to walk
|
||
// the BcbList with only the VacbSpinLock, we take that one
|
||
// out to change the list and set the count.
|
||
//
|
||
|
||
CcAcquireVacbLockAtDpcLevel();
|
||
InsertTailList( &AfterBcb->BcbLinks, &Bcb->BcbLinks );
|
||
|
||
ASSERT( (SharedCacheMap->SectionSize.QuadPart < VACB_SIZE_OF_FIRST_LEVEL) ||
|
||
(CcFindBcb(SharedCacheMap, FileOffset, &Bcb->BeyondLastByte, &AfterBcb) &&
|
||
(Bcb == AfterBcb)) );
|
||
|
||
//
|
||
// Now for large metadata streams we lock the Vacb level.
|
||
//
|
||
|
||
CcLockVacbLevel( SharedCacheMap, FileOffset->QuadPart );
|
||
CcReleaseVacbLockFromDpcLevel();
|
||
|
||
//
|
||
// If this resource was no write behind, let Ex know that the
|
||
// resource will never be acquired exclusive. Also disable
|
||
// boost (I know this is useless, but KenR said I had to do it).
|
||
//
|
||
|
||
if (SharedCacheMap &&
|
||
FlagOn(SharedCacheMap->Flags, DISABLE_WRITE_BEHIND)) {
|
||
#if DBG
|
||
SetFlag(Bcb->Resource.Flag, ResourceNeverExclusive);
|
||
#endif
|
||
ExDisableResourceBoost( &Bcb->Resource );
|
||
}
|
||
}
|
||
|
||
return Bcb;
|
||
}
|
||
|
||
|
||
//
|
||
// Internal support routine
|
||
//
|
||
|
||
VOID
|
||
FASTCALL
|
||
CcDeallocateBcb (
|
||
IN PBCB Bcb
|
||
)
|
||
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
This routine deallocates a Bcb to the BcbZone. It must
|
||
already be removed from the BcbList.
|
||
|
||
Arguments:
|
||
|
||
Bcb - the Bcb to deallocate
|
||
|
||
Return Value:
|
||
|
||
None
|
||
|
||
--*/
|
||
|
||
{
|
||
//
|
||
// Deallocate Resource structures
|
||
//
|
||
|
||
if (Bcb->NodeTypeCode == CACHE_NTC_BCB) {
|
||
|
||
ExDeleteResourceLite( &Bcb->Resource );
|
||
}
|
||
|
||
ExFreePool(Bcb);
|
||
return;
|
||
}
|
||
|
||
|
||
//
|
||
// Internal Support Routine
|
||
//
|
||
|
||
BOOLEAN
|
||
CcMapAndRead(
|
||
IN PSHARED_CACHE_MAP SharedCacheMap,
|
||
IN PLARGE_INTEGER FileOffset,
|
||
IN ULONG Length,
|
||
IN ULONG ZeroFlags,
|
||
IN BOOLEAN Wait,
|
||
IN PVOID BaseAddress
|
||
)
|
||
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
This routine may be called to insure that the specified data is mapped,
|
||
read into memory and locked. If TRUE is returned, then the
|
||
correct I/O status for the transfer is also returned, along with
|
||
a system-space address for the data.
|
||
|
||
Arguments:
|
||
|
||
SharedCacheMap - Supplies the address of the SharedCacheMap for the
|
||
data.
|
||
|
||
FileOffset - Supplies the file offset of the desired data.
|
||
|
||
Length - Supplies the total amount of data desired.
|
||
|
||
ZeroFlags - Defines which pages may be zeroed if not resident.
|
||
|
||
Wait - Supplies FALSE if the caller is not willing to block for the
|
||
data, or TRUE if the caller is willing to block.
|
||
|
||
BaseAddress - Supplies the system base address at which the data may
|
||
be accessed.
|
||
|
||
Return Value:
|
||
|
||
FALSE - if the caller supplied Wait = FALSE and the data could not
|
||
be returned without blocking.
|
||
|
||
TRUE - if the data is being returned.
|
||
|
||
Note: this routine may raise an exception due to a map or read failure,
|
||
however, this can only happen if Wait was specified as TRUE, since
|
||
mapping and reading will not be performed if the caller cannot wait.
|
||
|
||
--*/
|
||
|
||
{
|
||
ULONG ZeroCase;
|
||
ULONG SavedState;
|
||
BOOLEAN Result = FALSE;
|
||
PETHREAD Thread = PsGetCurrentThread();
|
||
|
||
UNREFERENCED_PARAMETER (SharedCacheMap);
|
||
UNREFERENCED_PARAMETER (FileOffset);
|
||
|
||
MmSavePageFaultReadAhead( Thread, &SavedState );
|
||
|
||
//
|
||
// try around everything for cleanup.
|
||
//
|
||
|
||
try {
|
||
|
||
ULONG PagesToGo;
|
||
|
||
//
|
||
// Now loop to touch all of the pages, calling MM to insure
|
||
// that if we fault, we take in exactly the number of pages
|
||
// we need.
|
||
//
|
||
|
||
PagesToGo = ADDRESS_AND_SIZE_TO_SPAN_PAGES( BaseAddress, Length );
|
||
|
||
//
|
||
// Loop to touch or zero the pages.
|
||
//
|
||
|
||
ZeroCase = ZERO_FIRST_PAGE;
|
||
|
||
while (PagesToGo) {
|
||
|
||
//
|
||
// If we cannot zero this page, or Mm failed to return
|
||
// a zeroed page, then just fault it in.
|
||
//
|
||
|
||
MmSetPageFaultReadAhead( Thread, (PagesToGo - 1) );
|
||
|
||
if (!FlagOn(ZeroFlags, ZeroCase) ||
|
||
!MmCheckCachedPageState(BaseAddress, TRUE)) {
|
||
|
||
//
|
||
// If we get here, it is almost certainly due to the fact
|
||
// that we can not take a zero page. MmCheckCachedPageState
|
||
// will so rarely return FALSE, that we will not worry
|
||
// about it. We will only check if the page is there if
|
||
// Wait is FALSE, so that we can do the right thing.
|
||
//
|
||
|
||
if (!MmCheckCachedPageState(BaseAddress, FALSE) && !Wait) {
|
||
try_return( Result = FALSE );
|
||
}
|
||
}
|
||
|
||
BaseAddress = (PCHAR)BaseAddress + PAGE_SIZE;
|
||
PagesToGo -= 1;
|
||
|
||
if (PagesToGo == 1) {
|
||
ZeroCase = ZERO_LAST_PAGE;
|
||
} else {
|
||
ZeroCase = ZERO_MIDDLE_PAGES;
|
||
}
|
||
}
|
||
|
||
try_return( Result = TRUE );
|
||
|
||
try_exit: NOTHING;
|
||
}
|
||
|
||
//
|
||
// Cleanup on the way out.
|
||
//
|
||
|
||
finally {
|
||
|
||
MmResetPageFaultReadAhead(Thread, SavedState);
|
||
}
|
||
|
||
return Result;
|
||
}
|
||
|
||
|
||
//
|
||
// Internal Support Routine
|
||
//
|
||
|
||
VOID
|
||
CcFreeActiveVacb (
|
||
IN PSHARED_CACHE_MAP SharedCacheMap,
|
||
IN PVACB ActiveVacb OPTIONAL,
|
||
IN ULONG ActivePage,
|
||
IN ULONG PageIsDirty
|
||
)
|
||
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
This routine may be called to zero the end of a locked page or
|
||
free the ActiveVacb for a Shared Cache Map, if there is one.
|
||
Note that some callers are not synchronized with foreground
|
||
activity, and may therefore not have an ActiveVacb. Examples
|
||
of unsynchronized callers are CcZeroEndOfLastPage (which is
|
||
called by MM) and any flushing done by CcWriteBehind.
|
||
|
||
Arguments:
|
||
|
||
SharedCacheMap - SharedCacheMap to examine for page to be zeroed.
|
||
|
||
ActiveVacb - Vacb to free
|
||
|
||
ActivePage - Page that was used
|
||
|
||
PageIsDirty - ACTIVE_PAGE_IS_DIRTY if the active page is dirty
|
||
|
||
Return Value:
|
||
|
||
None
|
||
|
||
--*/
|
||
|
||
{
|
||
LARGE_INTEGER ActiveOffset;
|
||
PVOID ActiveAddress;
|
||
ULONG BytesLeftInPage;
|
||
KIRQL OldIrql;
|
||
|
||
//
|
||
// If the page was locked, then unlock it.
|
||
//
|
||
|
||
if (SharedCacheMap->NeedToZero != NULL) {
|
||
|
||
PVACB NeedToZeroVacb;
|
||
|
||
//
|
||
// Zero the rest of the page under spinlock control,
|
||
// and then clear the address field. This field makes
|
||
// zero->nonzero transitions only when the file is exclusive,
|
||
// but it can make nonzero->zero transitions any time the
|
||
// spinlock is not held.
|
||
//
|
||
|
||
ExAcquireFastLock( &SharedCacheMap->ActiveVacbSpinLock, &OldIrql );
|
||
|
||
//
|
||
// The address could already be gone.
|
||
//
|
||
|
||
ActiveAddress = SharedCacheMap->NeedToZero;
|
||
if (ActiveAddress != NULL) {
|
||
|
||
BytesLeftInPage = PAGE_SIZE - ((((ULONG)((ULONG_PTR)ActiveAddress) - 1) & (PAGE_SIZE - 1)) + 1);
|
||
|
||
RtlZeroBytes( ActiveAddress, BytesLeftInPage );
|
||
NeedToZeroVacb = SharedCacheMap->NeedToZeroVacb;
|
||
ASSERT( NeedToZeroVacb != NULL );
|
||
SharedCacheMap->NeedToZero = NULL;
|
||
|
||
}
|
||
ExReleaseFastLock( &SharedCacheMap->ActiveVacbSpinLock, OldIrql );
|
||
|
||
//
|
||
// Now call MM to unlock the address. Note we will never store the
|
||
// address at the start of the page, but we can sometimes store
|
||
// the start of the next page when we have exactly filled the page.
|
||
//
|
||
|
||
if (ActiveAddress != NULL) {
|
||
MmUnlockCachedPage( (PVOID)((PCHAR)ActiveAddress - 1) );
|
||
CcFreeVirtualAddress( NeedToZeroVacb );
|
||
}
|
||
}
|
||
|
||
//
|
||
// See if caller actually has an ActiveVacb
|
||
//
|
||
|
||
if (ActiveVacb != NULL) {
|
||
|
||
//
|
||
// See if the page is dirty
|
||
//
|
||
|
||
if (PageIsDirty) {
|
||
|
||
ActiveOffset.QuadPart = (LONGLONG)ActivePage << PAGE_SHIFT;
|
||
ActiveAddress = (PVOID)((PCHAR)ActiveVacb->BaseAddress +
|
||
(ActiveOffset.LowPart & (VACB_MAPPING_GRANULARITY - 1)));
|
||
|
||
//
|
||
// Tell the Lazy Writer to write the page.
|
||
//
|
||
|
||
CcSetDirtyInMask( SharedCacheMap, &ActiveOffset, PAGE_SIZE );
|
||
|
||
//
|
||
// Now we need to clear the flag and decrement some counts if there is
|
||
// no other active Vacb which snuck in.
|
||
//
|
||
|
||
CcAcquireMasterLock( &OldIrql );
|
||
ExAcquireSpinLockAtDpcLevel( &SharedCacheMap->ActiveVacbSpinLock );
|
||
if ((SharedCacheMap->ActiveVacb == NULL) &&
|
||
FlagOn(SharedCacheMap->Flags, ACTIVE_PAGE_IS_DIRTY)) {
|
||
|
||
ClearFlag(SharedCacheMap->Flags, ACTIVE_PAGE_IS_DIRTY);
|
||
CcDeductDirtyPages( SharedCacheMap, 1);
|
||
}
|
||
ExReleaseSpinLockFromDpcLevel( &SharedCacheMap->ActiveVacbSpinLock );
|
||
CcReleaseMasterLock( OldIrql );
|
||
}
|
||
|
||
//
|
||
// Now free the Vacb.
|
||
//
|
||
|
||
CcFreeVirtualAddress( ActiveVacb );
|
||
}
|
||
}
|
||
|
||
|
||
//
|
||
// Internal Support Routine
|
||
//
|
||
|
||
VOID
|
||
CcMapAndCopy(
|
||
IN PSHARED_CACHE_MAP SharedCacheMap,
|
||
IN PVOID UserBuffer,
|
||
IN PLARGE_INTEGER FileOffset,
|
||
IN ULONG Length,
|
||
IN ULONG ZeroFlags,
|
||
IN PFILE_OBJECT FileObject
|
||
)
|
||
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
This routine may be called to copy the specified user data to the
|
||
cache via a special Mm routine which copies the data to uninitialized
|
||
pages and returns.
|
||
|
||
Arguments:
|
||
|
||
SharedCacheMap - Supplies the address of the SharedCacheMap for the
|
||
data.
|
||
|
||
UserBuffer - unsafe buffer supplying the user's data to be written
|
||
|
||
FileOffset - Supplies the file offset to be modified
|
||
|
||
Length - Supplies the total amount of data
|
||
|
||
ZeroFlags - Defines which pages may be zeroed if not resident.
|
||
|
||
WriteThrough - Supplies the file object being written to
|
||
|
||
Return Value:
|
||
|
||
None
|
||
|
||
--*/
|
||
|
||
{
|
||
ULONG ReceivedLength;
|
||
ULONG ZeroCase;
|
||
PVOID CacheBuffer;
|
||
PVOID SavedMappedBuffer;
|
||
ULONG SavedMappedLength;
|
||
ULONG ActivePage;
|
||
KIRQL OldIrql;
|
||
LARGE_INTEGER PFileOffset;
|
||
IO_STATUS_BLOCK IoStatus;
|
||
NTSTATUS Status;
|
||
ULONG SavedState;
|
||
LOGICAL MorePages;
|
||
BOOLEAN WriteThrough = BooleanFlagOn( FileObject->Flags, FO_WRITE_THROUGH );
|
||
ULONG SavedTotalLength = Length;
|
||
LARGE_INTEGER LocalOffset;
|
||
ULONG PageOffset = FileOffset->LowPart & (PAGE_SIZE - 1);
|
||
PVACB Vacb = NULL;
|
||
PETHREAD Thread = PsGetCurrentThread();
|
||
|
||
//
|
||
// Initialize SavePage to TRUE to skip the finally clause on zero-length
|
||
// writes.
|
||
//
|
||
|
||
BOOLEAN SavePage = TRUE;
|
||
|
||
//
|
||
// PREfix needs to see this explicitly, as opposed to a structure copy.
|
||
//
|
||
|
||
LocalOffset.QuadPart = FileOffset->QuadPart;
|
||
|
||
DebugTrace(+1, me, "CcMapAndCopy:\n", 0 );
|
||
DebugTrace( 0, me, " SharedCacheMap = %08lx\n", SharedCacheMap );
|
||
DebugTrace2(0, me, " FileOffset = %08lx, %08lx\n", FileOffset->LowPart,
|
||
FileOffset->HighPart );
|
||
DebugTrace( 0, me, " Length = %08lx\n", Length );
|
||
|
||
MmSavePageFaultReadAhead( Thread, &SavedState );
|
||
|
||
//
|
||
// BUGBUG: re-enable this path when we can also generate a ccsetvaliddata call
|
||
// in all cases to fix corruption issue see 615074)
|
||
//
|
||
|
||
#if 0
|
||
|
||
//
|
||
// See if we need to force write through. If the file object is of remote origin,
|
||
// it has been exempted from throttling. As a result, it is possible that too
|
||
// many pages will get dirty. In order to prevent this, we force write through
|
||
// on these file objects if we would have throttled them in the first place.
|
||
//
|
||
|
||
if (!WriteThrough && IoIsFileOriginRemote(FileObject)
|
||
|
||
&&
|
||
|
||
!CcCanIWrite( FileObject,
|
||
Length,
|
||
FALSE,
|
||
MAXUCHAR - 2 )) {
|
||
|
||
WriteThrough = TRUE;
|
||
}
|
||
|
||
#endif
|
||
|
||
//
|
||
// try around everything for cleanup.
|
||
//
|
||
|
||
try {
|
||
|
||
while (Length != 0) {
|
||
|
||
CacheBuffer = CcGetVirtualAddress( SharedCacheMap,
|
||
LocalOffset,
|
||
&Vacb,
|
||
&ReceivedLength );
|
||
|
||
//
|
||
// PREfix wants to know this cannot be NULL, otherwise it
|
||
// will complain.
|
||
//
|
||
|
||
ASSERT( CacheBuffer != NULL );
|
||
|
||
//
|
||
// If we got more than we need, make sure to only use
|
||
// the right amount.
|
||
//
|
||
|
||
if (ReceivedLength > Length) {
|
||
ReceivedLength = Length;
|
||
}
|
||
SavedMappedBuffer = CacheBuffer;
|
||
SavedMappedLength = ReceivedLength;
|
||
Length -= ReceivedLength;
|
||
|
||
//
|
||
// Now loop to touch all of the pages, calling MM to insure
|
||
// that if we fault, we take in exactly the number of pages
|
||
// we need.
|
||
//
|
||
|
||
CacheBuffer = (PVOID)((PCHAR)CacheBuffer - PageOffset);
|
||
ReceivedLength += PageOffset;
|
||
|
||
//
|
||
// Loop to touch or zero the pages.
|
||
//
|
||
|
||
ZeroCase = ZERO_FIRST_PAGE;
|
||
|
||
//
|
||
// Set up offset to page for use below.
|
||
//
|
||
|
||
PFileOffset = LocalOffset;
|
||
PFileOffset.LowPart -= PageOffset;
|
||
|
||
while (TRUE) {
|
||
|
||
//
|
||
// Calculate whether we wish to save an active page
|
||
// or not.
|
||
//
|
||
|
||
SavePage = (BOOLEAN) ((Length == 0) &&
|
||
(ReceivedLength < PAGE_SIZE) &&
|
||
(SavedTotalLength <= (PAGE_SIZE / 2)) &&
|
||
!WriteThrough);
|
||
|
||
MorePages = (ReceivedLength > PAGE_SIZE);
|
||
|
||
//
|
||
// Copy the data to the user buffer.
|
||
//
|
||
|
||
try {
|
||
|
||
//
|
||
// It is possible that there is a locked page
|
||
// hanging around, and so we need to nuke it here.
|
||
//
|
||
|
||
if (SharedCacheMap->NeedToZero != NULL) {
|
||
CcFreeActiveVacb( SharedCacheMap, NULL, 0, 0 );
|
||
}
|
||
|
||
Status = STATUS_SUCCESS;
|
||
if (FlagOn(ZeroFlags, ZeroCase)) {
|
||
|
||
Status = MmCopyToCachedPage( CacheBuffer,
|
||
UserBuffer,
|
||
PageOffset,
|
||
MorePages ?
|
||
(PAGE_SIZE - PageOffset) :
|
||
(ReceivedLength - PageOffset),
|
||
SavePage );
|
||
|
||
if (!NT_SUCCESS(Status)) {
|
||
|
||
ExRaiseStatus( FsRtlNormalizeNtstatus( Status,
|
||
STATUS_INVALID_USER_BUFFER ));
|
||
}
|
||
|
||
//
|
||
// Otherwise, we have to actually copy the data ourselves.
|
||
//
|
||
|
||
} else {
|
||
|
||
MmSetPageFaultReadAhead( Thread,
|
||
(MorePages && FlagOn(ZeroFlags, ZERO_LAST_PAGE)) ? 1 : 0);
|
||
|
||
RtlCopyBytes( (PVOID)((PCHAR)CacheBuffer + PageOffset),
|
||
UserBuffer,
|
||
MorePages ?
|
||
(PAGE_SIZE - PageOffset) :
|
||
(ReceivedLength - PageOffset) );
|
||
|
||
MmResetPageFaultReadAhead( Thread, SavedState );
|
||
|
||
}
|
||
|
||
} except( CcCopyReadExceptionFilter( GetExceptionInformation(),
|
||
&Status ) ) {
|
||
|
||
//
|
||
// If we got an access violation, then the user buffer went
|
||
// away. Otherwise we must have gotten an I/O error trying
|
||
// to bring the data in.
|
||
//
|
||
|
||
if (Status == STATUS_ACCESS_VIOLATION) {
|
||
ExRaiseStatus( STATUS_INVALID_USER_BUFFER );
|
||
}
|
||
else {
|
||
ExRaiseStatus( FsRtlNormalizeNtstatus( Status,
|
||
STATUS_UNEXPECTED_IO_ERROR ));
|
||
}
|
||
}
|
||
|
||
//
|
||
// Now get out quickly if it is a small write and we want
|
||
// to save the page.
|
||
//
|
||
|
||
if (SavePage) {
|
||
|
||
ActivePage = (ULONG)( Vacb->Overlay.FileOffset.QuadPart >> PAGE_SHIFT ) +
|
||
(ULONG)(((PCHAR)CacheBuffer - (PCHAR)Vacb->BaseAddress) >>
|
||
PAGE_SHIFT);
|
||
|
||
PFileOffset.LowPart += ReceivedLength;
|
||
|
||
//
|
||
// If the cache page was not locked, then clear the address
|
||
// to zero from.
|
||
//
|
||
|
||
if (Status == STATUS_CACHE_PAGE_LOCKED) {
|
||
|
||
//
|
||
// We need to guarantee this Vacb for zeroing and calling
|
||
// MmUnlockCachedPage, so we increment the active count here
|
||
// and remember it for CcFreeActiveVacb.
|
||
//
|
||
|
||
CcAcquireVacbLock( &OldIrql );
|
||
Vacb->Overlay.ActiveCount += 1;
|
||
|
||
ExAcquireSpinLockAtDpcLevel( &SharedCacheMap->ActiveVacbSpinLock );
|
||
|
||
ASSERT(SharedCacheMap->NeedToZero == NULL);
|
||
|
||
SharedCacheMap->NeedToZero = (PVOID)((PCHAR)CacheBuffer +
|
||
(PFileOffset.LowPart & (PAGE_SIZE - 1)));
|
||
SharedCacheMap->NeedToZeroPage = ActivePage;
|
||
SharedCacheMap->NeedToZeroVacb = Vacb;
|
||
|
||
ExReleaseSpinLockFromDpcLevel( &SharedCacheMap->ActiveVacbSpinLock );
|
||
CcReleaseVacbLock( OldIrql );
|
||
|
||
}
|
||
|
||
SetActiveVacb( SharedCacheMap,
|
||
OldIrql,
|
||
Vacb,
|
||
ActivePage,
|
||
ACTIVE_PAGE_IS_DIRTY );
|
||
|
||
try_return( NOTHING );
|
||
}
|
||
|
||
//
|
||
// If it looks like we may save a page and exit on the next loop,
|
||
// then we must make sure to mark the current page dirty. Note
|
||
// that Cc[Fast]CopyWrite will finish the last part of any page
|
||
// before allowing us to free the Active Vacb above, therefore
|
||
// this case only occurs for a small random write.
|
||
//
|
||
|
||
if ((SavedTotalLength <= (PAGE_SIZE / 2)) && !WriteThrough) {
|
||
|
||
CcSetDirtyInMask( SharedCacheMap, &PFileOffset, ReceivedLength );
|
||
}
|
||
|
||
UserBuffer = (PVOID)((PCHAR)UserBuffer + (PAGE_SIZE - PageOffset));
|
||
PageOffset = 0;
|
||
|
||
//
|
||
// If there is more than a page to go (including what we just
|
||
// copied), then adjust our buffer pointer and counts, and
|
||
// determine if we are to the last page yet.
|
||
//
|
||
|
||
if (MorePages) {
|
||
|
||
CacheBuffer = (PCHAR)CacheBuffer + PAGE_SIZE;
|
||
ReceivedLength -= PAGE_SIZE;
|
||
|
||
//
|
||
// Update our offset to the page. Note that 32-bit
|
||
// add is ok since we cannot cross a Vacb boundary
|
||
// and we reinitialize this offset before entering
|
||
// this loop again.
|
||
//
|
||
|
||
PFileOffset.LowPart += PAGE_SIZE;
|
||
|
||
if (ReceivedLength > PAGE_SIZE) {
|
||
ZeroCase = ZERO_MIDDLE_PAGES;
|
||
} else {
|
||
ZeroCase = ZERO_LAST_PAGE;
|
||
}
|
||
|
||
} else {
|
||
|
||
break;
|
||
}
|
||
}
|
||
|
||
//
|
||
// If there is still more to write (ie. we are going to step
|
||
// onto the next vacb) AND we just dirtied more than 64K, then
|
||
// do a vicarious MmFlushSection here. This prevents us from
|
||
// creating unlimited dirty pages while holding the file
|
||
// resource exclusive. We also do not need to set the pages
|
||
// dirty in the mask in this case.
|
||
//
|
||
|
||
if (Length > CcMaxDirtyWrite) {
|
||
|
||
MmSetAddressRangeModified( SavedMappedBuffer, SavedMappedLength );
|
||
MmFlushSection( SharedCacheMap->FileObject->SectionObjectPointer,
|
||
&LocalOffset,
|
||
SavedMappedLength,
|
||
&IoStatus,
|
||
TRUE );
|
||
|
||
if (!NT_SUCCESS(IoStatus.Status)) {
|
||
ExRaiseStatus( FsRtlNormalizeNtstatus( IoStatus.Status,
|
||
STATUS_UNEXPECTED_IO_ERROR ));
|
||
}
|
||
|
||
//
|
||
// For write through files, call Mm to propagate the dirty bits
|
||
// here while we have the view mapped, so we know the flush will
|
||
// work below. Again - do not set dirty in the mask.
|
||
//
|
||
|
||
} else if (WriteThrough) {
|
||
|
||
MmSetAddressRangeModified( SavedMappedBuffer, SavedMappedLength );
|
||
|
||
//
|
||
// For the normal case, just set the pages dirty for the Lazy Writer
|
||
// now.
|
||
//
|
||
|
||
} else {
|
||
|
||
CcSetDirtyInMask( SharedCacheMap, &LocalOffset, SavedMappedLength );
|
||
}
|
||
|
||
CcFreeVirtualAddress( Vacb );
|
||
Vacb = NULL;
|
||
|
||
//
|
||
// If we have to loop back to get at least a page, it will be ok to
|
||
// zero the first page. If we are not getting at least a page, we
|
||
// must make sure we clear the ZeroFlags if we cannot zero the last
|
||
// page.
|
||
//
|
||
|
||
if (Length >= PAGE_SIZE) {
|
||
ZeroFlags |= ZERO_FIRST_PAGE;
|
||
} else if ((ZeroFlags & ZERO_LAST_PAGE) == 0) {
|
||
ZeroFlags = 0;
|
||
}
|
||
|
||
//
|
||
// Note that if ReceivedLength (and therefore SavedMappedLength)
|
||
// was truncated to the transfer size then the new LocalOffset
|
||
// computed below is not correct. This is not an issue since
|
||
// in that case (Length == 0) and we would never get here.
|
||
//
|
||
|
||
LocalOffset.QuadPart = LocalOffset.QuadPart + (LONGLONG)SavedMappedLength;
|
||
}
|
||
try_exit: NOTHING;
|
||
}
|
||
|
||
//
|
||
// Cleanup on the way out.
|
||
//
|
||
|
||
finally {
|
||
|
||
MmResetPageFaultReadAhead( Thread, SavedState );
|
||
|
||
//
|
||
// We have no work to do if we have squirreled away the Vacb.
|
||
//
|
||
|
||
if (!SavePage || AbnormalTermination()) {
|
||
|
||
//
|
||
// Make sure we do not leave anything mapped or dirty in the PTE
|
||
// on the way out.
|
||
//
|
||
|
||
if (Vacb != NULL) {
|
||
|
||
CcFreeVirtualAddress( Vacb );
|
||
}
|
||
|
||
//
|
||
// Either flush the whole range because of write through, or
|
||
// mark it dirty for the lazy writer.
|
||
//
|
||
|
||
if (WriteThrough) {
|
||
|
||
MmFlushSection ( SharedCacheMap->FileObject->SectionObjectPointer,
|
||
FileOffset,
|
||
SavedTotalLength,
|
||
&IoStatus,
|
||
TRUE );
|
||
|
||
if (!NT_SUCCESS(IoStatus.Status)) {
|
||
ExRaiseStatus( FsRtlNormalizeNtstatus( IoStatus.Status,
|
||
STATUS_UNEXPECTED_IO_ERROR ));
|
||
}
|
||
|
||
//
|
||
// Advance ValidDataGoal
|
||
//
|
||
|
||
LocalOffset.QuadPart = FileOffset->QuadPart + (LONGLONG)SavedTotalLength;
|
||
if (LocalOffset.QuadPart > SharedCacheMap->ValidDataGoal.QuadPart) {
|
||
SharedCacheMap->ValidDataGoal = LocalOffset;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
DebugTrace(-1, me, "CcMapAndCopy -> %02lx\n", Result );
|
||
|
||
return;
|
||
}
|
||
|
||
|
||
BOOLEAN
|
||
CcLogError(
|
||
IN PFILE_OBJECT FileObject,
|
||
IN PUNICODE_STRING FileName,
|
||
IN NTSTATUS Error,
|
||
IN NTSTATUS DeviceError,
|
||
IN UCHAR IrpMajorCode
|
||
)
|
||
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
This routine writes an eventlog entry to the eventlog.
|
||
|
||
Arguments:
|
||
|
||
FileObject - The fileobject in whose context the error occured.
|
||
|
||
FileName - The filename to use in logging the error (usually the DOS-side name)
|
||
|
||
Error - The error to log in the eventlog record
|
||
|
||
DeviceError - The actual error that occured in the device - will be logged
|
||
as user data
|
||
|
||
Return Value:
|
||
|
||
True if successful, false if internal memory allocation failed
|
||
|
||
--*/
|
||
|
||
{
|
||
UCHAR ErrorPacketLength;
|
||
UCHAR BasePacketLength;
|
||
ULONG StringLength;
|
||
PIO_ERROR_LOG_PACKET ErrorLogEntry = NULL;
|
||
BOOLEAN Result = FALSE;
|
||
PWCHAR String;
|
||
|
||
PAGED_CODE();
|
||
|
||
//
|
||
// Get our error packet, holding the string and status code. Note we log against the
|
||
// true filesystem if this is available.
|
||
//
|
||
// The sizing of the packet is a bit slimy since the dumpdata is already grown by a
|
||
// ULONG onto the end of the packet. Since NTSTATUS is ULONG, well, we just work in
|
||
// place.
|
||
//
|
||
|
||
BasePacketLength = sizeof(IO_ERROR_LOG_PACKET);
|
||
if ((BasePacketLength + FileName->Length + sizeof(WCHAR)) <= ERROR_LOG_MAXIMUM_SIZE) {
|
||
ErrorPacketLength = (UCHAR)(BasePacketLength + FileName->Length + sizeof(WCHAR));
|
||
} else {
|
||
ErrorPacketLength = ERROR_LOG_MAXIMUM_SIZE;
|
||
}
|
||
|
||
ErrorLogEntry = (PIO_ERROR_LOG_PACKET) IoAllocateErrorLogEntry( (FileObject->Vpb ?
|
||
FileObject->Vpb->DeviceObject :
|
||
FileObject->DeviceObject),
|
||
ErrorPacketLength );
|
||
if (ErrorLogEntry) {
|
||
|
||
//
|
||
// Fill in the nonzero members of the packet.
|
||
//
|
||
|
||
ErrorLogEntry->MajorFunctionCode = IrpMajorCode;
|
||
ErrorLogEntry->ErrorCode = Error;
|
||
ErrorLogEntry->FinalStatus = DeviceError;
|
||
|
||
ErrorLogEntry->DumpDataSize = sizeof(NTSTATUS);
|
||
RtlCopyMemory( &ErrorLogEntry->DumpData, &DeviceError, sizeof(NTSTATUS) );
|
||
|
||
//
|
||
// The filename string is appended to the end of the error log entry. We may
|
||
// have to smash the middle to fit it in the limited space.
|
||
//
|
||
|
||
StringLength = ErrorPacketLength - BasePacketLength - sizeof(WCHAR);
|
||
|
||
ASSERT(!(StringLength % sizeof(WCHAR)));
|
||
|
||
String = (PWCHAR) ((PUCHAR)ErrorLogEntry + BasePacketLength);
|
||
ErrorLogEntry->NumberOfStrings = 1;
|
||
ErrorLogEntry->StringOffset = BasePacketLength;
|
||
|
||
//
|
||
// If the name does not fit in the packet, divide the name equally to the
|
||
// prefix and suffix, with an ellipsis " .. " (4 wide characters) to indicate
|
||
// the loss.
|
||
//
|
||
|
||
if (StringLength < FileName->Length) {
|
||
|
||
//
|
||
// Remember, prefix + " .. " + suffix is the length. Calculate by figuring
|
||
// the prefix and then get the suffix by whacking the ellipsis and prefix off
|
||
// the total.
|
||
//
|
||
|
||
ULONG NamePrefixSegmentLength = ((StringLength/sizeof(WCHAR))/2 - 2)*sizeof(WCHAR);
|
||
ULONG NameSuffixSegmentLength = StringLength - 4*sizeof(WCHAR) - NamePrefixSegmentLength;
|
||
|
||
ASSERT(!(NamePrefixSegmentLength % sizeof(WCHAR)));
|
||
ASSERT(!(NameSuffixSegmentLength % sizeof(WCHAR)));
|
||
|
||
RtlCopyMemory( String,
|
||
FileName->Buffer,
|
||
NamePrefixSegmentLength );
|
||
String = (PWCHAR)((PCHAR)String + NamePrefixSegmentLength);
|
||
|
||
RtlCopyMemory( String,
|
||
L" .. ",
|
||
4*sizeof(WCHAR) );
|
||
String += 4;
|
||
|
||
RtlCopyMemory( String,
|
||
(PUCHAR)FileName->Buffer +
|
||
FileName->Length - NameSuffixSegmentLength,
|
||
NameSuffixSegmentLength );
|
||
String = (PWCHAR)((PCHAR)String + NameSuffixSegmentLength);
|
||
|
||
} else {
|
||
|
||
RtlCopyMemory( String,
|
||
FileName->Buffer,
|
||
FileName->Length );
|
||
String += FileName->Length/sizeof(WCHAR);
|
||
}
|
||
|
||
//
|
||
// Null terminate the string and send the packet.
|
||
//
|
||
|
||
*String = L'\0';
|
||
|
||
IoWriteErrorLogEntry( ErrorLogEntry );
|
||
Result = TRUE;
|
||
}
|
||
|
||
return Result;
|
||
}
|
||
|
||
|
||
LOGICAL
|
||
CcHasInactiveViews (
|
||
VOID
|
||
)
|
||
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
This routine is called by Memory Management only to query if the system
|
||
cache has any inactive views. If so, Memory Management may issue a
|
||
subsequent call to CcUnmapInactiveViews to discard these views in an
|
||
attempt to reclaim the prototype PTE pool (and other resources tied to
|
||
the section).
|
||
|
||
Arguments:
|
||
|
||
None.
|
||
|
||
Return Value:
|
||
|
||
TRUE if Cc has any views it can discard, FALSE if not.
|
||
|
||
Environment:
|
||
|
||
Arbitrary thread context, generally APC_LEVEL or DISPATCH_LEVEL. Various
|
||
mutexes and/or spinlocks may be held by the caller.
|
||
|
||
--*/
|
||
|
||
{
|
||
return FALSE; // BUGBUG - add code to flesh out.
|
||
}
|
||
|
||
|
||
LOGICAL
|
||
CcUnmapInactiveViews (
|
||
IN ULONG NumberOfViewsToUnmap
|
||
)
|
||
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
This routine is called by Memory Management to request that the cache
|
||
manager unmap a number of inactive views. This call is generally made
|
||
because the system is low on pool (paged or nonpaged).
|
||
|
||
Discarding these views is done in an attempt to reclaim the prototype
|
||
PTE pool (and other resources tied to the section).
|
||
|
||
Arguments:
|
||
|
||
NumberOfViewsToUnmap - Supplies the desired number of views to unmap.
|
||
|
||
Return Value:
|
||
|
||
TRUE if Cc discarded *ANY* views, FALSE if not.
|
||
|
||
Environment:
|
||
|
||
Dereference segment thread context at PASSIVE_LEVEL.
|
||
|
||
--*/
|
||
|
||
{
|
||
UNREFERENCED_PARAMETER (NumberOfViewsToUnmap);
|
||
|
||
return FALSE; // BUGBUG - add code to flesh out.
|
||
}
|
||
|
||
#ifdef CCDBG
|
||
VOID
|
||
CcDump (
|
||
IN PVOID Ptr
|
||
)
|
||
|
||
{
|
||
PVOID Junk = Ptr;
|
||
}
|
||
#endif
|
||
|
||
|