607 lines
15 KiB
C
607 lines
15 KiB
C
/*++
|
||
|
||
Copyright (c) 2000 Microsoft Corporation
|
||
|
||
Module Name:
|
||
|
||
sratnuma.c
|
||
|
||
Abstract:
|
||
|
||
This module contain functions which support static NUMA configurations
|
||
as provided by the ACPI SRAT "Static Resource Affinity Table".
|
||
|
||
Author:
|
||
|
||
Peter L Johnston (peterj) 2-Jul-2000
|
||
|
||
Environment:
|
||
|
||
Kernel mode only.
|
||
|
||
Revision History:
|
||
|
||
--*/
|
||
|
||
#include "halp.h"
|
||
#include "acpitabl.h"
|
||
#include "xxacpi.h"
|
||
|
||
#if !defined(NT_UP)
|
||
|
||
#define ROUNDUP_TO_NEXT(base, size) \
|
||
((((ULONG_PTR)(base)) + (size)) & ~((size) - 1))
|
||
|
||
//
|
||
// The following routine is external but only used by NUMA support
|
||
// at the moment.
|
||
//
|
||
|
||
NTSTATUS
|
||
HalpGetApicIdByProcessorNumber(
|
||
IN UCHAR Processor,
|
||
IN OUT USHORT *ApicId
|
||
);
|
||
|
||
//
|
||
// Prototypes for alloc pragmas.
|
||
//
|
||
|
||
VOID
|
||
HalpNumaInitializeStaticConfiguration(
|
||
IN PLOADER_PARAMETER_BLOCK
|
||
);
|
||
|
||
|
||
#if defined(ALLOC_PRAGMA)
|
||
#pragma alloc_text(INIT,HalpNumaInitializeStaticConfiguration)
|
||
#endif
|
||
|
||
#define NEXT_ENTRY(base) (((PUCHAR)base) + (base)->Length)
|
||
|
||
#define HAL_MAX_NODES 8
|
||
|
||
#if defined(_WIN64)
|
||
|
||
#define HAL_MAX_PROCESSORS 64
|
||
|
||
#else
|
||
|
||
#define HAL_MAX_PROCESSORS 32
|
||
|
||
#endif
|
||
|
||
typedef struct _STATIC_NUMA_CONFIG {
|
||
USHORT ProcessorApicId[HAL_MAX_PROCESSORS];
|
||
UCHAR ProcessorProximity[HAL_MAX_PROCESSORS];
|
||
UCHAR ProximityId[HAL_MAX_NODES];
|
||
UCHAR NodeCount;
|
||
UCHAR ProcessorCount;
|
||
} HALPSRAT_STATIC_NUMA_CONFIG, *PHALPSRAT_STATIC_NUMA_CONFIG;
|
||
|
||
PHALPSRAT_STATIC_NUMA_CONFIG HalpNumaConfig;
|
||
PACPI_SRAT HalpAcpiSrat;
|
||
PULONG_PTR HalpNumaMemoryRanges;
|
||
PUCHAR HalpNumaMemoryNode;
|
||
ULONG HalpNumaLastRangeIndex;
|
||
|
||
ULONG
|
||
HalpNumaQueryPageToNode(
|
||
IN ULONG_PTR PhysicalPageNumber
|
||
)
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
Search the memory range descriptors to determine the node
|
||
this page exists on.
|
||
|
||
Arguments:
|
||
|
||
PhysicalPageNumber Provides the page number.
|
||
|
||
Return Value:
|
||
|
||
Returns the node number for the page.
|
||
|
||
--*/
|
||
|
||
{
|
||
ULONG Index = HalpNumaLastRangeIndex;
|
||
|
||
//
|
||
// Starting in the same range as the last page returned,
|
||
// look for this page.
|
||
//
|
||
|
||
if (PhysicalPageNumber >= HalpNumaMemoryRanges[Index]) {
|
||
|
||
//
|
||
// Search upwards.
|
||
//
|
||
|
||
while (PhysicalPageNumber >= HalpNumaMemoryRanges[Index+1]) {
|
||
Index++;
|
||
}
|
||
|
||
} else {
|
||
|
||
//
|
||
// Search downwards.
|
||
//
|
||
|
||
do {
|
||
Index--;
|
||
} while (PhysicalPageNumber < HalpNumaMemoryRanges[Index]);
|
||
}
|
||
|
||
HalpNumaLastRangeIndex = Index;
|
||
return HalpNumaMemoryNode[Index];
|
||
}
|
||
|
||
NTSTATUS
|
||
HalpNumaQueryProcessorNode(
|
||
IN ULONG ProcessorNumber,
|
||
OUT PUSHORT Identifier,
|
||
OUT PUCHAR Node
|
||
)
|
||
{
|
||
NTSTATUS Status;
|
||
USHORT ApicId;
|
||
UCHAR Proximity;
|
||
UCHAR i, j;
|
||
|
||
//
|
||
// Get the APIC Id for this processor.
|
||
//
|
||
|
||
Status = HalpGetApicIdByProcessorNumber((UCHAR)ProcessorNumber, &ApicId);
|
||
if (!NT_SUCCESS(Status)) {
|
||
return Status;
|
||
}
|
||
|
||
//
|
||
// Return the APIC Id as the Identifier. This should probably
|
||
// be the ACPI Id but we don't have a way to get that yet.
|
||
//
|
||
|
||
*Identifier = ApicId;
|
||
|
||
//
|
||
// Find the node this processor belongs to. The node is the
|
||
// index into the array of Proximity Ids for the entry corresponding
|
||
// to the Proximity Id of this processor.
|
||
//
|
||
|
||
for (i = 0; i < HalpNumaConfig->ProcessorCount; i++) {
|
||
if (HalpNumaConfig->ProcessorApicId[i] == ApicId) {
|
||
Proximity = HalpNumaConfig->ProcessorProximity[i];
|
||
for (j = 0; j < HalpNumaConfig->NodeCount; j++) {
|
||
if (HalpNumaConfig->ProximityId[j] == Proximity) {
|
||
*Node = j;
|
||
return STATUS_SUCCESS;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
//
|
||
// Didn't find this processor in the known set of APIC IDs, this
|
||
// would indicate a mismatch between the BIOS MP tables and the
|
||
// SRAT, or, didn't find the proximity for this processor in the
|
||
// table of proximity IDs. This would be an internal error as
|
||
// this array is build from the set of proximity IDs in the SRAT.
|
||
//
|
||
|
||
return STATUS_NOT_FOUND;
|
||
}
|
||
|
||
VOID
|
||
HalpNumaInitializeStaticConfiguration(
|
||
IN PLOADER_PARAMETER_BLOCK LoaderBlock
|
||
)
|
||
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
This routine reads the ACPI Static Resource Affinity Table to build
|
||
a picture of the system's NUMA configuration. This information is
|
||
saved in the HalpNumaConfig structure in a form which is optimal for
|
||
the OS's use.
|
||
|
||
Arguments:
|
||
|
||
LoaderBlock supplies a pointer to the system loader parameter block.
|
||
|
||
Return Value:
|
||
|
||
None.
|
||
|
||
--*/
|
||
|
||
{
|
||
ULONG NodeCount = 0;
|
||
ULONG MemoryDescriptorCount = 0;
|
||
UCHAR ProcessorCount = 0;
|
||
PACPI_SRAT_ENTRY SratEntry;
|
||
PACPI_SRAT_ENTRY SratEnd;
|
||
ULONG i, j;
|
||
BOOLEAN Swapped;
|
||
PHYSICAL_ADDRESS Base;
|
||
ULONG_PTR p;
|
||
PVOID Phys;
|
||
|
||
HalpAcpiSrat = HalpGetAcpiTablePhase0(LoaderBlock, ACPI_SRAT_SIGNATURE);
|
||
if (HalpAcpiSrat == NULL) {
|
||
return;
|
||
}
|
||
|
||
//
|
||
// The Static Resource Affinity Table (SRAT) exists.
|
||
//
|
||
// Scan it to determine the number of memory descriptors then
|
||
// allocate memory to contain the tables needed to hold the
|
||
// system's NUMA configuration.
|
||
//
|
||
|
||
SratEnd = (PACPI_SRAT_ENTRY)(((PUCHAR)HalpAcpiSrat) +
|
||
HalpAcpiSrat->Header.Length);
|
||
for (SratEntry = (PACPI_SRAT_ENTRY)(HalpAcpiSrat + 1);
|
||
SratEntry < SratEnd;
|
||
SratEntry = (PACPI_SRAT_ENTRY)NEXT_ENTRY(SratEntry)) {
|
||
switch (SratEntry->Type) {
|
||
case SratMemory:
|
||
MemoryDescriptorCount++;
|
||
break;
|
||
}
|
||
}
|
||
|
||
//
|
||
// HalpNumaConfig format:
|
||
//
|
||
// HalpNumaConfig->
|
||
// USHORT ProcessorApicId[HAL_MAX_PROCESSORS];
|
||
// UCHAR ProcessorProximity[HAL_MAX_PROCESSORS];
|
||
// UCHAR ProximityIds[HAL_MAX_NODES];
|
||
// UCHAR NodeCount;
|
||
// -pad- to 128 byte boundary
|
||
// HalpNumaMemoryNode->
|
||
// UCHAR MemoryRangeProximityId[NumberOfMemoryRanges];
|
||
// -pad to ULONG_PTR alignment-
|
||
// HalpNumaMemoryRanges->
|
||
// ULONG_PTR MemoryRangeBasePage[NumberOfMemoryRanges];
|
||
//
|
||
// This format has been selected to maximize cache hits while
|
||
// searching the ranges. Specifically, the size of the ranges
|
||
// array is kept to a minumum.
|
||
//
|
||
|
||
//
|
||
// Calculate number of pages required to hold the needed structures.
|
||
//
|
||
|
||
i = MemoryDescriptorCount * (sizeof(ULONG_PTR) + sizeof(UCHAR)) +
|
||
sizeof(HALPSRAT_STATIC_NUMA_CONFIG) + 2 * sizeof(ULONG) + 128;
|
||
i += PAGE_SIZE - 1;
|
||
i >>= PAGE_SHIFT;
|
||
|
||
Phys = (PVOID)HalpAllocPhysicalMemory(LoaderBlock,
|
||
MAXIMUM_PHYSICAL_ADDRESS,
|
||
i,
|
||
FALSE);
|
||
if (Phys == NULL) {
|
||
|
||
//
|
||
// Allocation failed, the system will not be able to run
|
||
// as a NUMA system,.... actually the system will probably
|
||
// not run far at all.
|
||
//
|
||
|
||
DbgPrint("HAL NUMA Initialization failed, could not allocate %d pages\n",
|
||
i);
|
||
|
||
HalpAcpiSrat = NULL;
|
||
return;
|
||
}
|
||
Base.QuadPart = (ULONG_PTR)Phys;
|
||
|
||
#if !defined(_IA64_)
|
||
|
||
HalpNumaConfig = HalpMapPhysicalMemory(Base, 1);
|
||
|
||
#else
|
||
|
||
HalpNumaConfig = HalpMapPhysicalMemory(Base, 1, MmCached);
|
||
|
||
#endif
|
||
|
||
if (HalpNumaConfig == NULL) {
|
||
|
||
//
|
||
// Couldn't map the allocation, give up.
|
||
//
|
||
|
||
HalpAcpiSrat = NULL;
|
||
return;
|
||
}
|
||
RtlZeroMemory(HalpNumaConfig, i * PAGE_SIZE);
|
||
|
||
//
|
||
// MemoryRangeProximity is an array of UCHARs starting at the next
|
||
// 128 byte boundary.
|
||
//
|
||
|
||
p = ROUNDUP_TO_NEXT((HalpNumaConfig + 1), 128);
|
||
HalpNumaMemoryNode = (PUCHAR)p;
|
||
|
||
//
|
||
// NumaMemoryRanges is an array of ULONG_PTRs starting at the next
|
||
// ULONG_PTR boundary.
|
||
//
|
||
|
||
p += (MemoryDescriptorCount + sizeof(ULONG_PTR)) & ~(sizeof(ULONG_PTR) - 1);
|
||
HalpNumaMemoryRanges = (PULONG_PTR)p;
|
||
|
||
//
|
||
// Rescan the SRAT entries filling in the HalpNumaConfig structure.
|
||
//
|
||
|
||
ProcessorCount = 0;
|
||
MemoryDescriptorCount = 0;
|
||
|
||
for (SratEntry = (PACPI_SRAT_ENTRY)(HalpAcpiSrat + 1);
|
||
SratEntry < SratEnd;
|
||
SratEntry = (PACPI_SRAT_ENTRY)NEXT_ENTRY(SratEntry)) {
|
||
|
||
//
|
||
// Does this entry belong to a proximity domain not previously
|
||
// seen? If so, we have a new node.
|
||
//
|
||
|
||
for (i = 0; i < HalpNumaConfig->NodeCount; i++) {
|
||
if (SratEntry->ProximityDomain == HalpNumaConfig->ProximityId[i]) {
|
||
break;
|
||
}
|
||
}
|
||
|
||
if (i == HalpNumaConfig->NodeCount) {
|
||
|
||
//
|
||
// This is an ID we haven't seen before. New Node.
|
||
//
|
||
|
||
if (HalpNumaConfig->NodeCount >= 8) {
|
||
|
||
//
|
||
// We support a maximum of 8 nodes, make this machine
|
||
// not NUMA. (Yes, we should free the config space
|
||
// we allocated,... but this is an error when it happens
|
||
// so I'm not worrying about it. peterj).
|
||
//
|
||
|
||
HalpAcpiSrat = NULL;
|
||
return;
|
||
}
|
||
HalpNumaConfig->ProximityId[i] = SratEntry->ProximityDomain;
|
||
HalpNumaConfig->NodeCount++;
|
||
}
|
||
|
||
switch (SratEntry->Type) {
|
||
case SratProcessorLocalAPIC:
|
||
|
||
if (SratEntry->ApicAffinity.Flags.Enabled == 0) {
|
||
|
||
//
|
||
// This processor is not enabled, skip it.
|
||
//
|
||
|
||
continue;
|
||
}
|
||
if (ProcessorCount == HAL_MAX_PROCESSORS) {
|
||
|
||
//
|
||
// Can't handle any more processors. Turn this
|
||
// into a non-numa machine.
|
||
//
|
||
|
||
HalpAcpiSrat = NULL;
|
||
return;
|
||
}
|
||
HalpNumaConfig->ProcessorApicId[ProcessorCount] =
|
||
|
||
#if defined(_IA64_)
|
||
|
||
SratEntry->ApicAffinity.ApicId << 8 |
|
||
(SratEntry->ApicAffinity.SApicEid);
|
||
|
||
#else
|
||
|
||
SratEntry->ApicAffinity.ApicId |
|
||
(SratEntry->ApicAffinity.SApicEid << 8);
|
||
|
||
#endif
|
||
|
||
HalpNumaConfig->ProcessorProximity[ProcessorCount] =
|
||
SratEntry->ProximityDomain;
|
||
ProcessorCount++;
|
||
break;
|
||
case SratMemory:
|
||
|
||
//
|
||
// Save the proximity and the base page for this range.
|
||
//
|
||
|
||
HalpNumaMemoryNode[MemoryDescriptorCount] =
|
||
SratEntry->ProximityDomain;
|
||
Base = SratEntry->MemoryAffinity.Base;
|
||
Base.QuadPart >>= PAGE_SHIFT;
|
||
ASSERT(Base.u.HighPart == 0);
|
||
|
||
// N.B. This does NOT work for 64 bit systems, those systems
|
||
// should keep both halves of the base address.
|
||
|
||
HalpNumaMemoryRanges[MemoryDescriptorCount] = Base.u.LowPart;
|
||
MemoryDescriptorCount++;
|
||
break;
|
||
}
|
||
}
|
||
|
||
HalpNumaConfig->ProcessorCount = ProcessorCount;
|
||
|
||
//
|
||
// Make sure processor 0 is always in 'logical' node 0. This
|
||
// is achieved by making sure the proximity Id for the first
|
||
// processor is always the first proximity Id in the table.
|
||
//
|
||
|
||
i = 0;
|
||
if (!NT_SUCCESS(HalpGetApicIdByProcessorNumber(0, (PUSHORT)&i))) {
|
||
|
||
//
|
||
// Couldn't find the ApicId of processor 0? Not quite
|
||
// sure what to do, I suspect the MP table's APIC IDs
|
||
// don't match the SRAT's.
|
||
//
|
||
|
||
DbgPrint("HAL No APIC ID for boot processor.\n");
|
||
}
|
||
|
||
for (j = 0; j < ProcessorCount; j++) {
|
||
if (HalpNumaConfig->ProcessorApicId[j] == (USHORT)i) {
|
||
UCHAR Proximity = HalpNumaConfig->ProcessorProximity[j];
|
||
for (i = 0; i < HalpNumaConfig->NodeCount; i++) {
|
||
if (HalpNumaConfig->ProximityId[i] == Proximity) {
|
||
HalpNumaConfig->ProximityId[i] =
|
||
HalpNumaConfig->ProximityId[0];
|
||
HalpNumaConfig->ProximityId[0] = Proximity;
|
||
break;
|
||
}
|
||
}
|
||
break;
|
||
}
|
||
}
|
||
|
||
//
|
||
// Sort the memory ranges. There shouldn't be very many
|
||
// so a bubble sort should suffice.
|
||
//
|
||
|
||
j = MemoryDescriptorCount - 1;
|
||
do {
|
||
Swapped = FALSE;
|
||
for (i = 0; i < j; i++) {
|
||
|
||
ULONG_PTR t;
|
||
UCHAR td;
|
||
|
||
t = HalpNumaMemoryRanges[i];
|
||
if (t > HalpNumaMemoryRanges[i+1]) {
|
||
Swapped = TRUE;
|
||
HalpNumaMemoryRanges[i] = HalpNumaMemoryRanges[i+1];
|
||
HalpNumaMemoryRanges[i+1] = t;
|
||
|
||
//
|
||
// Keep the proximity domain in sync with the base.
|
||
//
|
||
|
||
td = HalpNumaMemoryNode[i];
|
||
HalpNumaMemoryNode[i] = HalpNumaMemoryNode[i+1];
|
||
HalpNumaMemoryNode[i+1] = td;
|
||
}
|
||
}
|
||
|
||
//
|
||
// The highest value is now at the top so cut it from the sort.
|
||
//
|
||
|
||
j--;
|
||
} while (Swapped == TRUE);
|
||
|
||
//
|
||
// When searching the memory descriptors to find out which domain
|
||
// a page is in, we don't care about gaps, we'll never be asked
|
||
// for a page in a gap, so, if two descriptors refer to the same
|
||
// domain, merge them in place.
|
||
//
|
||
|
||
j = 0;
|
||
for (i = 1; i < MemoryDescriptorCount; i++) {
|
||
if (HalpNumaMemoryNode[j] !=
|
||
HalpNumaMemoryNode[i]) {
|
||
j++;
|
||
HalpNumaMemoryNode[j] = HalpNumaMemoryNode[i];
|
||
HalpNumaMemoryRanges[j] = HalpNumaMemoryRanges[i];
|
||
continue;
|
||
}
|
||
}
|
||
MemoryDescriptorCount = j + 1;
|
||
|
||
//
|
||
// The highest page number on an x86 (32 bit) system with PAE
|
||
// making physical addresses 36 bits, is (1 << (36-12)) - 1, ie
|
||
// 0x00ffffff. Terminate the table with 0xffffffff which won't
|
||
// actually correspond to any domain but will always be higher
|
||
// than any valid value.
|
||
//
|
||
|
||
HalpNumaMemoryRanges[MemoryDescriptorCount] = 0xffffffff;
|
||
|
||
//
|
||
// And the base of the lowest range should be 0 even if there
|
||
// are no pages that low.
|
||
//
|
||
|
||
HalpNumaMemoryRanges[0] = 0;
|
||
|
||
//
|
||
// Convert the proximity IDs in the memory node array to
|
||
// node number. Node number is the index of the matching
|
||
// entry in proximity ID array.
|
||
//
|
||
|
||
for (i= 0; i < MemoryDescriptorCount; i++) {
|
||
for (j = 0; j < HalpNumaConfig->NodeCount; j++) {
|
||
if (HalpNumaMemoryNode[i] == HalpNumaConfig->ProximityId[j]) {
|
||
HalpNumaMemoryNode[i] = (UCHAR)j;
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
#endif
|
||
|
||
NTSTATUS
|
||
HalpGetAcpiStaticNumaTopology(
|
||
HAL_NUMA_TOPOLOGY_INTERFACE * NumaInfo
|
||
)
|
||
{
|
||
#if !defined(NT_UP)
|
||
|
||
//
|
||
// This routine is never called unless this ACPI HAL found
|
||
// a Static Resource Affinity Table (SRAT). But just in case ...
|
||
//
|
||
|
||
if (HalpAcpiSrat == NULL) {
|
||
return STATUS_INVALID_LEVEL;
|
||
}
|
||
|
||
//
|
||
// Fill in the data structure for the kernel.
|
||
//
|
||
|
||
NumaInfo->NumberOfNodes = HalpNumaConfig->NodeCount;
|
||
NumaInfo->QueryProcessorNode = HalpNumaQueryProcessorNode;
|
||
NumaInfo->PageToNode = HalpNumaQueryPageToNode;
|
||
return STATUS_SUCCESS;
|
||
|
||
#else
|
||
|
||
return STATUS_INVALID_LEVEL;
|
||
|
||
#endif
|
||
}
|
||
|