windows-nt/Source/XPSP1/NT/base/ntos/mm/buildmdl.c
2020-09-26 16:20:57 +08:00

1515 lines
40 KiB
C
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*++
Copyright (c) 1999 Microsoft Corporation
Module Name:
buildmdl.c
Abstract:
This module contains the Mm support routines for the cache manager to
prefetching groups of pages from secondary storage using logical file
offets instead of virtual addresses. This saves the cache manager from
having to map pages unnecessarily.
The caller builds a list of various file objects and logical block offsets,
passing them to MmPrefetchPagesIntoLockedMdl. The code here then examines
the internal pages, reading in those that are not already valid or in
transition. These pages are read with a single read, using a dummy page
to bridge gaps of pages that were valid or transition prior to the I/O
being issued.
Upon conclusion of the I/O, control is returned to the calling thread.
All pages are referenced counted as though they were probed and locked,
regardless of whether they are currently valid or transition.
Author:
Landy Wang (landyw) 12-Feb-2001
Revision History:
--*/
#include "mi.h"
#if DBG
ULONG MiCcDebug;
#define MI_CC_FORCE_PREFETCH 0x1 // Trim all user pages to force prefetch
#define MI_CC_DELAY 0x2 // Delay hoping to trigger collisions
#endif
typedef struct _MI_READ_INFO {
PCONTROL_AREA ControlArea;
PFILE_OBJECT FileObject;
LARGE_INTEGER FileOffset;
PMMINPAGE_SUPPORT InPageSupport;
PMDL IoMdl;
PMDL ApiMdl;
PMMPFN DummyPagePfn;
PSUBSECTION FirstReferencedSubsection;
PSUBSECTION LastReferencedSubsection;
SIZE_T LengthInBytes;
} MI_READ_INFO, *PMI_READ_INFO;
VOID
MiCcReleasePrefetchResources (
IN PMI_READ_INFO MiReadInfo,
IN NTSTATUS Status
);
NTSTATUS
MiCcPrepareReadInfo (
IN PMI_READ_INFO MiReadInfo
);
NTSTATUS
MiCcPutPagesInTransition (
IN PMI_READ_INFO MiReadInfo
);
NTSTATUS
MiCcCompletePrefetchIos (
PMI_READ_INFO MiReadInfo
);
VOID
MiRemoveUserPages (
VOID
);
VOID
MiPfFreeDummyPage (
IN PMMPFN DummyPagePfn
);
#ifdef ALLOC_PRAGMA
#pragma alloc_text (PAGE, MmPrefetchPagesIntoLockedMdl)
#pragma alloc_text (PAGE, MiCcPrepareReadInfo)
#pragma alloc_text (PAGE, MiCcReleasePrefetchResources)
#endif
NTSTATUS
MmPrefetchPagesIntoLockedMdl (
IN PFILE_OBJECT FileObject,
IN PLARGE_INTEGER FileOffset,
IN SIZE_T Length,
OUT PMDL *MdlOut
)
/*++
Routine Description:
This routine fills an MDL with pages described by the file object's
offset and length.
This routine is for cache manager usage only.
Arguments:
FileObject - Supplies a pointer to the file object for a file which was
opened with NO_INTERMEDIATE_BUFFERING clear, i.e., for
which CcInitializeCacheMap was called by the file system.
FileOffset - Supplies the byte offset in the file for the desired data.
Length - Supplies the length of the desired data in bytes.
MdlOut - On output it returns a pointer to an Mdl describing
the desired data.
Return Value:
NTSTATUS.
Environment:
Kernel mode. PASSIVE_LEVEL.
--*/
{
MI_READ_INFO MiReadInfo;
NTSTATUS status;
KIRQL OldIrql;
LOGICAL ApcNeeded;
PETHREAD CurrentThread;
ASSERT (KeGetCurrentIrql() == PASSIVE_LEVEL);
RtlZeroMemory (&MiReadInfo, sizeof(MiReadInfo));
MiReadInfo.FileObject = FileObject;
MiReadInfo.FileOffset = *FileOffset;
MiReadInfo.LengthInBytes = Length;
//
// Prepare for the impending read : allocate MDLs, inpage blocks,
// reference count subsections, etc.
//
status = MiCcPrepareReadInfo (&MiReadInfo);
if (!NT_SUCCESS (status)) {
MiCcReleasePrefetchResources (&MiReadInfo, status);
return status;
}
ASSERT (MiReadInfo.InPageSupport != NULL);
//
// APCs must be disabled once we put a page in transition. Otherwise
// a thread suspend will stop us from issuing the I/O - this will hang
// any other threads that need the same page.
//
CurrentThread = PsGetCurrentThread();
ApcNeeded = FALSE;
ASSERT ((PKTHREAD)CurrentThread == KeGetCurrentThread ());
KeEnterCriticalRegionThread ((PKTHREAD)CurrentThread);
//
// The nested fault count protects this thread from deadlocks where a
// special kernel APC fires and references the same user page(s) we are
// putting in transition.
//
KeRaiseIrql (APC_LEVEL, &OldIrql);
ASSERT (CurrentThread->NestedFaultCount == 0);
CurrentThread->NestedFaultCount += 1;
KeLowerIrql (OldIrql);
//
// Allocate physical memory, lock down all the pages and issue any
// I/O that may be needed. When MiCcPutPagesInTransition returns
// STATUS_SUCCESS or STATUS_ISSUE_PAGING_IO, it guarantees that the
// ApiMdl contains reference-counted (locked-down) pages.
//
status = MiCcPutPagesInTransition (&MiReadInfo);
if (NT_SUCCESS (status)) {
//
// No I/O was issued because all the pages were already resident and
// have now been locked down.
//
ASSERT (MiReadInfo.ApiMdl != NULL);
}
else if (status == STATUS_ISSUE_PAGING_IO) {
//
// Wait for the I/O to complete. Note APCs must remain disabled.
//
ASSERT (MiReadInfo.InPageSupport != NULL);
status = MiCcCompletePrefetchIos (&MiReadInfo);
}
else {
//
// Some error occurred (like insufficient memory, etc) so fail
// the request by falling through.
//
}
//
// Release acquired resources like pool, subsections, etc.
//
MiCcReleasePrefetchResources (&MiReadInfo, status);
//
// Only now that the I/O have been completed (not just issued) can
// APCs be re-enabled. This prevents a user-issued suspend APC from
// keeping a shared page in transition forever.
//
KeRaiseIrql (APC_LEVEL, &OldIrql);
ASSERT (CurrentThread->NestedFaultCount == 1);
CurrentThread->NestedFaultCount -= 1;
if (CurrentThread->ApcNeeded == 1) {
ApcNeeded = TRUE;
CurrentThread->ApcNeeded = 0;
}
KeLowerIrql (OldIrql);
KeLeaveCriticalRegionThread ((PKTHREAD)CurrentThread);
ASSERT (KeGetCurrentIrql() == PASSIVE_LEVEL);
ASSERT (CurrentThread->NestedFaultCount == 0);
ASSERT (CurrentThread->ApcNeeded == 0);
if (ApcNeeded == TRUE) {
KeRaiseIrql (APC_LEVEL, &OldIrql);
IoRetryIrpCompletions ();
KeLowerIrql (OldIrql);
}
*MdlOut = MiReadInfo.ApiMdl;
return status;
}
VOID
MiCcReleasePrefetchResources (
IN PMI_READ_INFO MiReadInfo,
IN NTSTATUS Status
)
/*++
Routine Description:
This routine releases all resources consumed to handle a system cache
logical offset based prefetch.
Environment:
Kernel mode, PASSIVE_LEVEL.
--*/
{
PSUBSECTION FirstReferencedSubsection;
PSUBSECTION LastReferencedSubsection;
//
// Release all subsection prototype PTE references.
//
FirstReferencedSubsection = MiReadInfo->FirstReferencedSubsection;
LastReferencedSubsection = MiReadInfo->LastReferencedSubsection;
while (FirstReferencedSubsection != LastReferencedSubsection) {
MiRemoveViewsFromSectionWithPfn ((PMSUBSECTION) FirstReferencedSubsection,
FirstReferencedSubsection->PtesInSubsection);
FirstReferencedSubsection = FirstReferencedSubsection->NextSubsection;
}
if (MiReadInfo->IoMdl != NULL) {
ExFreePool (MiReadInfo->IoMdl);
}
//
// Note successful returns yield the ApiMdl so don't free it here.
//
if (!NT_SUCCESS (Status)) {
if (MiReadInfo->ApiMdl != NULL) {
ExFreePool (MiReadInfo->ApiMdl);
}
}
if (MiReadInfo->InPageSupport != NULL) {
#if DBG
MiReadInfo->InPageSupport->ListEntry.Next = NULL;
#endif
MiFreeInPageSupportBlock (MiReadInfo->InPageSupport);
}
//
// Put DummyPage back on the free list.
//
if (MiReadInfo->DummyPagePfn != NULL) {
MiPfFreeDummyPage (MiReadInfo->DummyPagePfn);
}
}
NTSTATUS
MiCcPrepareReadInfo (
IN PMI_READ_INFO MiReadInfo
)
/*++
Routine Description:
This routine constructs MDLs that describe the pages in the argument
read-list. The caller will then issue the I/O on return.
Arguments:
MiReadInfo - Supplies a pointer to the read-list.
Return Value:
Various NTSTATUS codes.
Environment:
Kernel mode, PASSIVE_LEVEL.
--*/
{
ULONG PteOffset;
NTSTATUS Status;
PMMPTE ProtoPte;
PMMPTE LastProto;
PMMPTE *ProtoPteArray;
PCONTROL_AREA ControlArea;
PSUBSECTION Subsection;
PMMINPAGE_SUPPORT InPageSupport;
PMDL Mdl;
PMDL IoMdl;
PMDL ApiMdl;
ULONG i;
PFN_NUMBER NumberOfPages;
ASSERT (KeGetCurrentIrql() == PASSIVE_LEVEL);
NumberOfPages = ADDRESS_AND_SIZE_TO_SPAN_PAGES (MiReadInfo->FileOffset.LowPart, MiReadInfo->LengthInBytes);
//
// Translate the section object into the relevant control area.
//
ControlArea = (PCONTROL_AREA)MiReadInfo->FileObject->SectionObjectPointer->DataSectionObject;
//
// Initialize the internal Mi readlist.
//
MiReadInfo->ControlArea = ControlArea;
//
// Allocate and initialize an inpage support block for this run.
//
InPageSupport = MiGetInPageSupportBlock (FALSE, PREFETCH_PROCESS);
if (InPageSupport == NULL) {
return STATUS_INSUFFICIENT_RESOURCES;
}
MiReadInfo->InPageSupport = InPageSupport;
//
// Allocate and initialize an MDL to return to our caller. The actual
// frame numbers are filled in when all the pages are reference counted.
//
ApiMdl = MmCreateMdl (NULL, NULL, NumberOfPages << PAGE_SHIFT);
if (ApiMdl == NULL) {
return STATUS_INSUFFICIENT_RESOURCES;
}
ApiMdl->MdlFlags |= MDL_PAGES_LOCKED;
MiReadInfo->ApiMdl = ApiMdl;
//
// Allocate and initialize an MDL to use for the actual transfer (if any).
//
IoMdl = MmCreateMdl (NULL, NULL, NumberOfPages << PAGE_SHIFT);
if (IoMdl == NULL) {
return STATUS_INSUFFICIENT_RESOURCES;
}
MiReadInfo->IoMdl = IoMdl;
Mdl = IoMdl;
//
// If the section is backed by a ROM, then there's no need to prefetch
// anything as it would waste RAM.
//
if (ControlArea->u.Flags.Rom == 1) {
ASSERT (XIPConfigured == TRUE);
#if 0
//
// Calculate the offset to read into the file.
// offset = base + ((thispte - basepte) << PAGE_SHIFT)
//
StartingOffset.QuadPart = MiReadInfo.FileOffset;
TempOffset = MiEndingOffset(Subsection);
ASSERT (StartingOffset.QuadPart < TempOffset.QuadPart);
PageFrameIndex = (PFN_NUMBER) (StartingOffset.QuadPart >> PAGE_SHIFT);
PageFrameIndex += ((PLARGE_CONTROL_AREA)ControlArea)->StartingFrame;
//
// Increment the PFN reference count in the control area for
// the subsection (the PFN lock is required to modify this field).
//
ControlArea->NumberOfPfnReferences += 1;
Pfn1 = MI_PFN_ELEMENT (PageFrameIndex);
ASSERT (Pfn1->u3.e1.Rom == 1);
if (Pfn1->u3.e1.PageLocation != 0) {
ASSERT (Pfn1->u3.e1.PageLocation == StandbyPageList);
MiUnlinkPageFromList (Pfn1);
//
// Update the PFN database - the reference count must be
// incremented as the share count is going to go from zero to 1.
//
ASSERT (Pfn1->u2.ShareCount == 0);
ASSERT (Pfn1->u3.e2.ReferenceCount == 0);
Pfn1->u3.e2.ReferenceCount += 1;
Pfn1->u2.ShareCount += 1;
Pfn1->u3.e1.PageLocation = ActiveAndValid;
ASSERT (Pfn1->PteAddress == PointerPte);
ASSERT (Pfn1->u1.Event == NULL);
//
// Determine the page frame number of the page table page which
// contains this PTE.
//
PteFramePointer = MiGetPteAddress (PointerPte);
if (PteFramePointer->u.Hard.Valid == 0) {
#if (_MI_PAGING_LEVELS < 3)
if (!NT_SUCCESS(MiCheckPdeForPagedPool (PointerPte))) {
#endif
KeBugCheckEx (MEMORY_MANAGEMENT,
0x61940,
(ULONG_PTR)PointerPte,
(ULONG_PTR)PteFramePointer->u.Long,
0);
#if (_MI_PAGING_LEVELS < 3)
}
#endif
}
PteFramePage = MI_GET_PAGE_FRAME_FROM_PTE (PteFramePointer);
ASSERT (Pfn1->PteFrame == PteFramePage);
//
// Increment the share count for the page table page containing
// this PTE as the PTE is going to be made valid.
//
ASSERT (PteFramePage != 0);
Pfn2 = MI_PFN_ELEMENT (PteFramePage);
Pfn2->u2.ShareCount += 1;
}
else {
ASSERT (Pfn1->u3.e1.InPageError == 0);
ASSERT (Pfn1->u3.e1.PrototypePte == 1);
ASSERT (Pfn1->u1.Event == NULL);
MiInitializePfn (PageFrameIndex, PointerPte, 0);
}
//
// Put the prototype PTE into the valid state.
// LWFIX: don't make proto valid
//
MI_MAKE_VALID_PTE (TempPte,
PageFrameIndex,
PointerPte->u.Soft.Protection,
PointerPte);
MI_WRITE_VALID_PTE (PointerPte, TempPte);
#endif
// LWFIX: not success, but what ? and who frees pool
return STATUS_SUCCESS;
}
//
// Make sure the section is really prefetchable - physical and
// pagefile-backed sections are not.
//
if ((ControlArea->u.Flags.PhysicalMemory) ||
(ControlArea->u.Flags.Image == 1) ||
(ControlArea->FilePointer == NULL)) {
return STATUS_INVALID_PARAMETER_1;
}
//
// Start the read at the proper file offset.
//
InPageSupport->ReadOffset = MiReadInfo->FileOffset;
ASSERT (BYTE_OFFSET (InPageSupport->ReadOffset.LowPart) == 0);
InPageSupport->FilePointer = MiReadInfo->FileObject;
//
// Stash a pointer to the start of the prototype PTE array (the values
// in the array are not contiguous as they may cross subsections)
// in the inpage block so we can walk it quickly later when the pages
// are put into transition.
//
ProtoPteArray = (PMMPTE *)(Mdl + 1);
InPageSupport->BasePte = (PMMPTE) ProtoPteArray;
//
// Data (but not image) reads use the whole page and the filesystems
// zero fill any remainder beyond valid data length so we don't
// bother to handle this here. It is important to specify the
// entire page where possible so the filesystem won't post this
// which will hurt perf. LWFIX: must use CcZero to make this true.
//
ASSERT (((ULONG_PTR)Mdl & (sizeof(QUAD) - 1)) == 0);
InPageSupport->u1.e1.PrefetchMdlHighBits = ((ULONG_PTR)Mdl >> 3);
//
// Initialize the prototype PTE pointers.
//
ASSERT (ControlArea->u.Flags.GlobalOnlyPerSession == 0);
if (ControlArea->u.Flags.Rom == 0) {
Subsection = (PSUBSECTION)(ControlArea + 1);
}
else {
Subsection = (PSUBSECTION)((PLARGE_CONTROL_AREA)ControlArea + 1);
}
#if DBG
if (MiCcDebug & MI_CC_FORCE_PREFETCH) {
MiRemoveUserPages ();
}
#endif
//
// Calculate the first prototype PTE address.
//
PteOffset = (ULONG)(MiReadInfo->FileOffset.QuadPart >> PAGE_SHIFT);
//
// Make sure the PTEs are not in the extended part of the segment.
//
while (PteOffset >= Subsection->PtesInSubsection) {
PteOffset -= Subsection->PtesInSubsection;
Subsection = Subsection->NextSubsection;
}
Status = MiAddViewsForSectionWithPfn ((PMSUBSECTION) Subsection,
Subsection->PtesInSubsection);
if (!NT_SUCCESS (Status)) {
return Status;
}
MiReadInfo->FirstReferencedSubsection = Subsection;
MiReadInfo->LastReferencedSubsection = Subsection;
ProtoPte = &Subsection->SubsectionBase[PteOffset];
LastProto = &Subsection->SubsectionBase[Subsection->PtesInSubsection];
for (i = 0; i < NumberOfPages; i += 1) {
//
// Calculate which PTE maps the given logical block offset.
//
// Always look forwards (as an optimization) in the subsection chain.
//
// A quick check is made first to avoid recalculations and loops where
// possible.
//
if (ProtoPte >= LastProto) {
//
// Handle extended subsections. Increment the view count for
// every subsection spanned by this request, creating prototype
// PTEs if needed.
//
ASSERT (i != 0);
Subsection = Subsection->NextSubsection;
Status = MiAddViewsForSectionWithPfn ((PMSUBSECTION) Subsection,
Subsection->PtesInSubsection);
if (!NT_SUCCESS (Status)) {
return Status;
}
MiReadInfo->LastReferencedSubsection = Subsection;
ProtoPte = Subsection->SubsectionBase;
LastProto = &Subsection->SubsectionBase[Subsection->PtesInSubsection];
}
*ProtoPteArray = ProtoPte;
ProtoPteArray += 1;
ProtoPte += 1;
}
return STATUS_SUCCESS;
}
NTSTATUS
MiCcPutPagesInTransition (
IN PMI_READ_INFO MiReadInfo
)
/*++
Routine Description:
This routine allocates physical memory for the specified read-list and
puts all the pages in transition (so collided faults from other threads
for these same pages remain coherent). I/O for any pages not already
resident are issued here. The caller must wait for their completion.
Arguments:
MiReadInfo - Supplies a pointer to the read-list.
Return Value:
STATUS_SUCCESS - all the pages were already resident, reference counts
have been applied and no I/O needs to be waited for.
STATUS_ISSUE_PAGING_IO - the I/O has been issued and the caller must wait.
Various other failure status values indicate the operation failed.
Environment:
Kernel mode. PASSIVE_LEVEL.
--*/
{
NTSTATUS status;
PMMPTE LocalPrototypePte;
PVOID StartingVa;
PFN_NUMBER MdlPages;
KIRQL OldIrql;
MMPTE PteContents;
PFN_NUMBER PageFrameIndex;
PFN_NUMBER ResidentAvailableCharge;
PPFN_NUMBER IoPage;
PPFN_NUMBER ApiPage;
PPFN_NUMBER Page;
PPFN_NUMBER DestinationPage;
ULONG PageColor;
PMMPTE PointerPte;
PMMPTE *ProtoPteArray;
PMMPTE *EndProtoPteArray;
PFN_NUMBER DummyPage;
PMDL Mdl;
PMDL FreeMdl;
PMMPFN PfnProto;
PMMPFN Pfn1;
PMMPFN DummyPfn1;
ULONG i;
PFN_NUMBER DummyTrim;
ULONG NumberOfPagesNeedingIo;
MMPTE TempPte;
PMMPTE PointerPde;
PEPROCESS CurrentProcess;
PMMINPAGE_SUPPORT InPageSupport;
ASSERT (KeGetCurrentIrql() == PASSIVE_LEVEL);
MiReadInfo->DummyPagePfn = NULL;
FreeMdl = NULL;
CurrentProcess = PsGetCurrentProcess();
PfnProto = NULL;
PointerPde = NULL;
InPageSupport = MiReadInfo->InPageSupport;
Mdl = MI_EXTRACT_PREFETCH_MDL (InPageSupport);
ASSERT (Mdl == MiReadInfo->IoMdl);
IoPage = (PPFN_NUMBER)(Mdl + 1);
ApiPage = (PPFN_NUMBER)(MiReadInfo->ApiMdl + 1);
StartingVa = (PVOID)((PCHAR)Mdl->StartVa + Mdl->ByteOffset);
MdlPages = ADDRESS_AND_SIZE_TO_SPAN_PAGES (StartingVa,
Mdl->ByteCount);
if (MdlPages + 1 > MAXUSHORT) {
//
// The PFN ReferenceCount for the dummy page could wrap, refuse the
// request.
//
return STATUS_INSUFFICIENT_RESOURCES;
}
NumberOfPagesNeedingIo = 0;
ProtoPteArray = (PMMPTE *)InPageSupport->BasePte;
EndProtoPteArray = ProtoPteArray + MdlPages;
ASSERT (*ProtoPteArray != NULL);
LOCK_PFN (OldIrql);
//
// Ensure sufficient pages exist for the transfer plus the dummy page.
//
if ((MmAvailablePages <= MdlPages) ||
(MI_NONPAGABLE_MEMORY_AVAILABLE() <= (SPFN_NUMBER)MdlPages)) {
UNLOCK_PFN (OldIrql);
return STATUS_INSUFFICIENT_RESOURCES;
}
//
// Charge resident available immediately as the PFN lock may get released
// and reacquired below before all the pages have been locked down.
// Note the dummy page is immediately charged separately.
//
MmResidentAvailablePages -= MdlPages;
MM_BUMP_COUNTER(46, MdlPages);
ResidentAvailableCharge = MdlPages;
//
// Allocate a dummy page to map discarded pages that aren't skipped.
//
DummyPage = MiRemoveAnyPage (0);
Pfn1 = MI_PFN_ELEMENT (DummyPage);
ASSERT (Pfn1->u2.ShareCount == 0);
ASSERT (Pfn1->u3.e2.ReferenceCount == 0);
MiInitializePfnForOtherProcess (DummyPage, MI_PF_DUMMY_PAGE_PTE, 0);
//
// Always bias the reference count by 1 and charge for this locked page
// up front so the myriad increments and decrements don't get slowed
// down with needless checking.
//
MI_ADD_LOCKED_PAGE_CHARGE(Pfn1, 42);
Pfn1->u3.e2.ReferenceCount += 1;
Pfn1->u3.e1.ReadInProgress = 1;
Pfn1->u3.e1.PrototypePte = 0;
MiReadInfo->DummyPagePfn = Pfn1;
DummyPfn1 = Pfn1;
DummyPfn1->u3.e2.ReferenceCount =
(USHORT)(DummyPfn1->u3.e2.ReferenceCount + MdlPages);
//
// Properly initialize the inpage support block fields we overloaded.
//
InPageSupport->BasePte = *ProtoPteArray;
//
// Build the proper InPageSupport and MDL to describe this run.
//
for (; ProtoPteArray < EndProtoPteArray; ProtoPteArray += 1, IoPage += 1, ApiPage += 1) {
//
// Fill the MDL entry for this RLE.
//
PointerPte = *ProtoPteArray;
ASSERT (PointerPte != NULL);
//
// The PointerPte better be inside a prototype PTE allocation
// so that subsequent page trims update the correct PTEs.
//
ASSERT (((PointerPte >= (PMMPTE)MmPagedPoolStart) &&
(PointerPte <= (PMMPTE)MmPagedPoolEnd)) ||
((PointerPte >= (PMMPTE)MmSpecialPoolStart) && (PointerPte <= (PMMPTE)MmSpecialPoolEnd)));
//
// Check the state of this prototype PTE now that the PFN lock is held.
// If the page is not resident, the PTE must be put in transition with
// read in progress before the PFN lock is released.
//
//
// Lock page containing prototype PTEs in memory by
// incrementing the reference count for the page.
// Unlock any page locked earlier containing prototype PTEs if
// the containing page is not the same for both.
//
if (PfnProto != NULL) {
if (PointerPde != MiGetPteAddress (PointerPte)) {
ASSERT (PfnProto->u3.e2.ReferenceCount > 1);
MI_REMOVE_LOCKED_PAGE_CHARGE_AND_DECREF(PfnProto, 43);
PfnProto = NULL;
}
}
if (PfnProto == NULL) {
ASSERT (!MI_IS_PHYSICAL_ADDRESS (PointerPte));
PointerPde = MiGetPteAddress (PointerPte);
if (PointerPde->u.Hard.Valid == 0) {
MiMakeSystemAddressValidPfn (PointerPte);
}
PfnProto = MI_PFN_ELEMENT (PointerPde->u.Hard.PageFrameNumber);
MI_ADD_LOCKED_PAGE_CHARGE(PfnProto, 44);
PfnProto->u3.e2.ReferenceCount += 1;
ASSERT (PfnProto->u3.e2.ReferenceCount > 1);
}
recheck:
PteContents = *PointerPte;
// LWFIX: are zero or dzero ptes possible here ?
ASSERT (PteContents.u.Long != ZeroKernelPte.u.Long);
if (PteContents.u.Hard.Valid == 1) {
PageFrameIndex = MI_GET_PAGE_FRAME_FROM_PTE (&PteContents);
Pfn1 = MI_PFN_ELEMENT (PageFrameIndex);
ASSERT (Pfn1->u3.e1.PrototypePte == 1);
MI_ADD_LOCKED_PAGE_CHARGE(Pfn1, 45);
Pfn1->u3.e2.ReferenceCount += 1;
*ApiPage = PageFrameIndex;
*IoPage = DummyPage;
continue;
}
if ((PteContents.u.Soft.Prototype == 0) &&
(PteContents.u.Soft.Transition == 1)) {
//
// The page is in transition. If there is an inpage still in
// progress, wait for it to complete. Reference the PFN and
// then march on.
//
PageFrameIndex = MI_GET_PAGE_FRAME_FROM_TRANSITION_PTE (&PteContents);
Pfn1 = MI_PFN_ELEMENT (PageFrameIndex);
ASSERT (Pfn1->u3.e1.PrototypePte == 1);
if (Pfn1->u4.InPageError) {
//
// There was an in-page read error and there are other
// threads colliding for this page, delay to let the
// other threads complete and then retry.
//
UNLOCK_PFN (OldIrql);
KeDelayExecutionThread (KernelMode, FALSE, (PLARGE_INTEGER)&MmHalfSecond);
LOCK_PFN (OldIrql);
goto recheck;
}
if (Pfn1->u3.e1.ReadInProgress) {
// LWFIX - start with temp\aw.c
}
//
// PTE refers to a normal transition PTE.
//
ASSERT ((SPFN_NUMBER)MmAvailablePages >= 0);
if (MmAvailablePages == 0) {
//
// This can only happen if the system is utilizing a hardware
// compression cache. This ensures that only a safe amount
// of the compressed virtual cache is directly mapped so that
// if the hardware gets into trouble, we can bail it out.
//
UNLOCK_PFN (OldIrql);
KeDelayExecutionThread (KernelMode, FALSE, (PLARGE_INTEGER)&MmHalfSecond);
LOCK_PFN (OldIrql);
goto recheck;
}
//
// The PFN reference count will be 1 already here if the
// modified writer has begun a write of this page. Otherwise
// it's ordinarily 0.
//
MI_ADD_LOCKED_PAGE_CHARGE_FOR_MODIFIED_PAGE (Pfn1, 46);
Pfn1->u3.e2.ReferenceCount += 1;
*IoPage = DummyPage;
*ApiPage = PageFrameIndex;
continue;
}
ASSERT (PteContents.u.Soft.Prototype == 1);
if (MiEnsureAvailablePageOrWait (NULL, NULL)) {
//
// Had to wait so recheck all state.
//
goto recheck;
}
NumberOfPagesNeedingIo += 1;
//
// Allocate a physical page.
//
PageColor = MI_PAGE_COLOR_VA_PROCESS (
MiGetVirtualAddressMappedByPte (PointerPte),
&CurrentProcess->NextPageColor);
PageFrameIndex = MiRemoveAnyPage (PageColor);
Pfn1 = MI_PFN_ELEMENT (PageFrameIndex);
ASSERT (Pfn1->u3.e2.ReferenceCount == 0);
ASSERT (Pfn1->u2.ShareCount == 0);
ASSERT (PointerPte->u.Hard.Valid == 0);
//
// Initialize read-in-progress PFN.
//
MiInitializePfn (PageFrameIndex, PointerPte, 0);
//
// These pieces of MiInitializePfn initialization are overridden
// here as these pages are only going into prototype
// transition and not into any page tables.
//
MI_ADD_LOCKED_PAGE_CHARGE(Pfn1, 47);
Pfn1->u2.ShareCount -= 1;
Pfn1->u3.e1.PageLocation = ZeroedPageList;
//
// Initialize the I/O specific fields.
//
Pfn1->u1.Event = &InPageSupport->Event;
Pfn1->u3.e1.ReadInProgress = 1;
ASSERT (Pfn1->u4.InPageError == 0);
Pfn1->u3.e1.PrototypePte = 1;
//
// Increment the PFN reference count in the control area for
// the subsection.
//
MiReadInfo->ControlArea->NumberOfPfnReferences += 1;
//
// Put the prototype PTE into the transition state.
//
MI_MAKE_TRANSITION_PTE (TempPte,
PageFrameIndex,
PointerPte->u.Soft.Protection,
PointerPte);
MI_WRITE_INVALID_PTE (PointerPte, TempPte);
*IoPage = PageFrameIndex;
*ApiPage = PageFrameIndex;
}
//
// Return the upfront resident available charge as the individual charges
// have all been made at this point.
//
MmResidentAvailablePages += ResidentAvailableCharge;
MM_BUMP_COUNTER(47, MdlPages);
//
// If all the pages were resident, dereference the dummy page references
// now and notify our caller that I/O is not necessary.
//
if (NumberOfPagesNeedingIo == 0) {
ASSERT (DummyPfn1->u3.e2.ReferenceCount > MdlPages);
DummyPfn1->u3.e2.ReferenceCount =
(USHORT)(DummyPfn1->u3.e2.ReferenceCount - MdlPages);
//
// Unlock page containing prototype PTEs.
//
if (PfnProto != NULL) {
ASSERT (PfnProto->u3.e2.ReferenceCount > 1);
MI_REMOVE_LOCKED_PAGE_CHARGE_AND_DECREF(PfnProto, 48);
}
UNLOCK_PFN (OldIrql);
return STATUS_SUCCESS;
}
//
// Carefully trim leading dummy pages.
//
Page = (PPFN_NUMBER)(Mdl + 1);
DummyTrim = 0;
for (i = 0; i < MdlPages - 1; i += 1) {
if (*Page == DummyPage) {
DummyTrim += 1;
Page += 1;
}
else {
break;
}
}
if (DummyTrim != 0) {
Mdl->Size = (USHORT)(Mdl->Size - (DummyTrim * sizeof(PFN_NUMBER)));
Mdl->ByteCount -= (ULONG)(DummyTrim * PAGE_SIZE);
ASSERT (Mdl->ByteCount != 0);
InPageSupport->ReadOffset.QuadPart += (DummyTrim * PAGE_SIZE);
DummyPfn1->u3.e2.ReferenceCount =
(USHORT)(DummyPfn1->u3.e2.ReferenceCount - DummyTrim);
//
// Shuffle down the PFNs in the MDL.
// Recalculate BasePte to adjust for the shuffle.
//
Pfn1 = MI_PFN_ELEMENT (*Page);
ASSERT (Pfn1->PteAddress->u.Hard.Valid == 0);
ASSERT ((Pfn1->PteAddress->u.Soft.Prototype == 0) &&
(Pfn1->PteAddress->u.Soft.Transition == 1));
InPageSupport->BasePte = Pfn1->PteAddress;
DestinationPage = (PPFN_NUMBER)(Mdl + 1);
do {
*DestinationPage = *Page;
DestinationPage += 1;
Page += 1;
i += 1;
} while (i < MdlPages);
MdlPages -= DummyTrim;
}
//
// Carefully trim trailing dummy pages.
//
ASSERT (MdlPages != 0);
Page = (PPFN_NUMBER)(Mdl + 1) + MdlPages - 1;
if (*Page == DummyPage) {
ASSERT (MdlPages >= 2);
//
// Trim the last page specially as it may be a partial page.
//
Mdl->Size -= sizeof(PFN_NUMBER);
if (BYTE_OFFSET(Mdl->ByteCount) != 0) {
Mdl->ByteCount &= ~(PAGE_SIZE - 1);
}
else {
Mdl->ByteCount -= PAGE_SIZE;
}
ASSERT (Mdl->ByteCount != 0);
DummyPfn1->u3.e2.ReferenceCount -= 1;
//
// Now trim any other trailing pages.
//
Page -= 1;
DummyTrim = 0;
while (Page != ((PPFN_NUMBER)(Mdl + 1))) {
if (*Page != DummyPage) {
break;
}
DummyTrim += 1;
Page -= 1;
}
if (DummyTrim != 0) {
ASSERT (Mdl->Size > (USHORT)(DummyTrim * sizeof(PFN_NUMBER)));
Mdl->Size = (USHORT)(Mdl->Size - (DummyTrim * sizeof(PFN_NUMBER)));
Mdl->ByteCount -= (ULONG)(DummyTrim * PAGE_SIZE);
DummyPfn1->u3.e2.ReferenceCount =
(USHORT)(DummyPfn1->u3.e2.ReferenceCount - DummyTrim);
}
ASSERT (MdlPages > DummyTrim + 1);
MdlPages -= (DummyTrim + 1);
#if DBG
StartingVa = (PVOID)((PCHAR)Mdl->StartVa + Mdl->ByteOffset);
ASSERT (MdlPages == ADDRESS_AND_SIZE_TO_SPAN_PAGES(StartingVa,
Mdl->ByteCount));
#endif
}
//
// If the MDL is not already embedded in the inpage block, see if its
// final size qualifies it - if so, embed it now.
//
if ((Mdl != &InPageSupport->Mdl) &&
(Mdl->ByteCount <= (MM_MAXIMUM_READ_CLUSTER_SIZE + 1) * PAGE_SIZE)){
#if DBG
RtlFillMemoryUlong (&InPageSupport->Page[0],
(MM_MAXIMUM_READ_CLUSTER_SIZE+1) * sizeof (PFN_NUMBER),
0xf1f1f1f1);
#endif
RtlCopyMemory (&InPageSupport->Mdl, Mdl, Mdl->Size);
FreeMdl = Mdl;
Mdl = &InPageSupport->Mdl;
ASSERT (((ULONG_PTR)Mdl & (sizeof(QUAD) - 1)) == 0);
InPageSupport->u1.e1.PrefetchMdlHighBits = ((ULONG_PTR)Mdl >> 3);
}
ASSERT (MdlPages != 0);
ASSERT (Mdl->Size - sizeof(MDL) == BYTES_TO_PAGES(Mdl->ByteCount) * sizeof(PFN_NUMBER));
DummyPfn1->u3.e2.ReferenceCount =
(USHORT)(DummyPfn1->u3.e2.ReferenceCount - NumberOfPagesNeedingIo);
MmInfoCounters.PageReadIoCount += 1;
MmInfoCounters.PageReadCount += NumberOfPagesNeedingIo;
//
// Unlock page containing prototype PTEs.
//
if (PfnProto != NULL) {
ASSERT (PfnProto->u3.e2.ReferenceCount > 1);
MI_REMOVE_LOCKED_PAGE_CHARGE_AND_DECREF(PfnProto, 49);
}
UNLOCK_PFN (OldIrql);
if (FreeMdl != NULL) {
ASSERT (MiReadInfo->IoMdl == FreeMdl);
MiReadInfo->IoMdl = NULL;
ExFreePool (FreeMdl);
}
#if DBG
if (MiCcDebug & MI_CC_DELAY) {
//
// This delay provides a window to increase the chance of collided
// faults.
//
KeDelayExecutionThread (KernelMode, FALSE, (PLARGE_INTEGER)&MmHalfSecond);
}
#endif
//
// Finish initialization of the prefetch MDL (and the API MDL).
//
ASSERT ((Mdl->MdlFlags & MDL_MAPPED_TO_SYSTEM_VA) == 0);
Mdl->MdlFlags |= (MDL_PAGES_LOCKED | MDL_IO_PAGE_READ);
ASSERT (InPageSupport->u1.e1.Completed == 0);
ASSERT (InPageSupport->Thread == PsGetCurrentThread());
ASSERT64 (InPageSupport->UsedPageTableEntries == 0);
ASSERT (InPageSupport->WaitCount >= 1);
ASSERT (InPageSupport->u1.e1.PrefetchMdlHighBits != 0);
//
// The API caller expects an MDL containing all the locked pages so
// it can be used for a transfer.
//
// Note that an extra reference count is not taken on each page -
// rather when the Io MDL completes, its reference counts are not
// decremented (except for the dummy page). This combined with the
// reference count already taken on the resident pages keeps the
// accounting correct. Only if an error occurs will the Io MDL
// completion decrement the reference counts.
//
//
// Initialize the inpage support block Pfn field.
//
LocalPrototypePte = InPageSupport->BasePte;
ASSERT (LocalPrototypePte->u.Hard.Valid == 0);
ASSERT ((LocalPrototypePte->u.Soft.Prototype == 0) &&
(LocalPrototypePte->u.Soft.Transition == 1));
PageFrameIndex = MI_GET_PAGE_FRAME_FROM_TRANSITION_PTE(LocalPrototypePte);
Pfn1 = MI_PFN_ELEMENT (PageFrameIndex);
InPageSupport->Pfn = Pfn1;
//
// Issue the paging I/O.
//
ASSERT (KeGetCurrentIrql() == PASSIVE_LEVEL);
status = IoAsynchronousPageRead (InPageSupport->FilePointer,
Mdl,
&InPageSupport->ReadOffset,
&InPageSupport->Event,
&InPageSupport->IoStatus);
if (!NT_SUCCESS (status)) {
//
// Set the event as the I/O system doesn't set it on errors.
// This way our caller will automatically unroll the PFN reference
// counts, etc, when the MiWaitForInPageComplete returns this status.
//
InPageSupport->IoStatus.Status = status;
InPageSupport->IoStatus.Information = 0;
KeSetEvent (&InPageSupport->Event, 0, FALSE);
}
#if DBG
if (MiCcDebug & MI_CC_DELAY) {
//
// This delay provides a window to increase the chance of collided
// faults.
//
KeDelayExecutionThread (KernelMode, FALSE, (PLARGE_INTEGER)&MmHalfSecond);
}
#endif
return STATUS_ISSUE_PAGING_IO;
}
NTSTATUS
MiCcCompletePrefetchIos (
IN PMI_READ_INFO MiReadInfo
)
/*++
Routine Description:
This routine waits for a series of page reads to complete
and completes the requests.
Arguments:
MiReadInfo - Pointer to the read-list.
Return Value:
NTSTATUS of the I/O request.
Environment:
Kernel mode, PASSIVE_LEVEL.
--*/
{
PMDL Mdl;
PMMPFN Pfn1;
PMMPFN PfnClusterPage;
PPFN_NUMBER Page;
NTSTATUS status;
LONG NumberOfBytes;
PMMINPAGE_SUPPORT InPageSupport;
ASSERT (KeGetCurrentIrql() == PASSIVE_LEVEL);
InPageSupport = MiReadInfo->InPageSupport;
ASSERT (InPageSupport->Pfn != 0);
Pfn1 = InPageSupport->Pfn;
Mdl = MI_EXTRACT_PREFETCH_MDL (InPageSupport);
Page = (PPFN_NUMBER)(Mdl + 1);
status = MiWaitForInPageComplete (InPageSupport->Pfn,
InPageSupport->BasePte,
NULL,
InPageSupport->BasePte,
InPageSupport,
PREFETCH_PROCESS);
//
// MiWaitForInPageComplete RETURNS WITH THE PFN LOCK HELD!!!
//
NumberOfBytes = (LONG)Mdl->ByteCount;
while (NumberOfBytes > 0) {
//
// Only decrement reference counts if an error occurred.
//
PfnClusterPage = MI_PFN_ELEMENT (*Page);
#if DBG
if (PfnClusterPage->u4.InPageError) {
//
// If the page is marked with an error, then the whole transfer
// must be marked as not successful as well. The only exception
// is the prefetch dummy page which is used in multiple
// transfers concurrently and thus may have the inpage error
// bit set at any time (due to another transaction besides
// the current one).
//
ASSERT ((status != STATUS_SUCCESS) ||
(PfnClusterPage->PteAddress == MI_PF_DUMMY_PAGE_PTE));
}
#endif
if (PfnClusterPage->u3.e1.ReadInProgress != 0) {
ASSERT (PfnClusterPage->u4.PteFrame != MI_MAGIC_AWE_PTEFRAME);
PfnClusterPage->u3.e1.ReadInProgress = 0;
if (PfnClusterPage->u4.InPageError == 0) {
PfnClusterPage->u1.Event = (PKEVENT)NULL;
}
}
//
// Note the reference count for each page is NOT decremented unless
// the I/O failed, in which case it is done below. This allows the
// MmPrefetchPagesIntoLockedMdl API to return a locked page MDL.
//
Page += 1;
NumberOfBytes -= PAGE_SIZE;
}
if (status != STATUS_SUCCESS) {
//
// An I/O error occurred during the page read
// operation. All the pages which were just
// put into transition must be put onto the
// free list if InPageError is set, and their
// PTEs restored to the proper contents.
//
Page = (PPFN_NUMBER)(Mdl + 1);
NumberOfBytes = (LONG)Mdl->ByteCount;
while (NumberOfBytes > 0) {
PfnClusterPage = MI_PFN_ELEMENT (*Page);
MI_REMOVE_LOCKED_PAGE_CHARGE_AND_DECREF(PfnClusterPage, 50);
if (PfnClusterPage->u4.InPageError == 1) {
if (PfnClusterPage->u3.e2.ReferenceCount == 0) {
ASSERT (PfnClusterPage->u3.e1.PageLocation ==
StandbyPageList);
MiUnlinkPageFromList (PfnClusterPage);
MiRestoreTransitionPte (*Page);
MiInsertPageInFreeList (*Page);
}
}
Page += 1;
NumberOfBytes -= PAGE_SIZE;
}
}
//
// All the relevant prototype PTEs should be in the transition or
// valid states and all page frames should be referenced.
// LWFIX: add code to checked build to verify this.
//
ASSERT (InPageSupport->WaitCount >= 1);
UNLOCK_PFN (PASSIVE_LEVEL);
#if DBG
InPageSupport->ListEntry.Next = NULL;
#endif
MiFreeInPageSupportBlock (InPageSupport);
MiReadInfo->InPageSupport = NULL;
return status;
}