565 lines
18 KiB
C
565 lines
18 KiB
C
// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil -*- (for GNU Emacs)
|
|
//
|
|
// Copyright (c) 1998-2000 Microsoft Corporation
|
|
//
|
|
// This file is part of the Microsoft Research IPv6 Network Protocol Stack.
|
|
// You should have received a copy of the Microsoft End-User License Agreement
|
|
// for this software along with this release; see the file "license.txt".
|
|
// If not, please see http://www.research.microsoft.com/msripv6/license.htm,
|
|
// or write to Microsoft Research, One Microsoft Way, Redmond, WA 98052-6399.
|
|
//
|
|
// Abstract:
|
|
//
|
|
// Routing code external definitions for Internet Protocol Version 6.
|
|
//
|
|
|
|
|
|
#ifndef ROUTE_INCLUDED
|
|
#define ROUTE_INCLUDED 1
|
|
|
|
#ifndef IPINFO_INCLUDED
|
|
# include <ipinfo.h>
|
|
#endif
|
|
|
|
typedef struct BindingCacheEntry BindingCacheEntry;
|
|
typedef struct RouteTableEntry RouteTableEntry;
|
|
typedef struct SitePrefixEntry SitePrefixEntry;
|
|
|
|
extern void InitRouting(void);
|
|
|
|
extern void UnloadRouting(void);
|
|
|
|
//
|
|
// Structure of a route cache entry.
|
|
//
|
|
// A route cache entry (RCE) primarily caches two computations:
|
|
// next-hop determination and source address selection.
|
|
// An RCE also caches other information related to the destination,
|
|
// like path MTU.
|
|
//
|
|
// An RCE can also be created as a result of receiving an Redirect
|
|
// ICMP message.
|
|
//
|
|
// There is at most one RCE per destination address / interface pair.
|
|
// Our route cache corresponds to the destination cache
|
|
// mentioned in RFC 1970's conceptual data structures,
|
|
// with the addition of support for multi-homed nodes.
|
|
//
|
|
// The primary lookup key for RCEs is the destination address.
|
|
// The current implementation just searches a list of all RCEs,
|
|
// but a hash table or tree data structure would be preferable.
|
|
//
|
|
// Some nodes (like busy servers) might have many thousands of RCEs
|
|
// but only tens of NCEs, because most destinations are reached
|
|
// through only a few neighbor routers. Some nodes (like busy routers)
|
|
// will have relatively few RCEs and hundreds of NCEs, because
|
|
// forwarding does not use an RCE.
|
|
//
|
|
// The three major components of an RCE are the destination address,
|
|
// NTE (indicates both the interface, and the best source address
|
|
// on that interface to use for this destination), and NCE
|
|
// (neighbor to which to send packets for this destination).
|
|
//
|
|
// Once an RCE is created, these three components are read-only
|
|
// and anyone who holds a reference for the RCE can rely on
|
|
// them not changing. The RCE holds references for the NTE and NCE.
|
|
// This allows code that holds an RCE to access the important
|
|
// fields without acquiring any locks. Fields like the path MTU
|
|
// can also be safely read without a lock.
|
|
//
|
|
// When an RCE becomes invalid, it is removed from the route cache
|
|
// but it is not deallocated until it has zero references.
|
|
// The route cache itself holds one reference on RCEs in the cache.
|
|
//
|
|
// Because an RCE caches the result of two computations, RCEs can
|
|
// become invalid (stale) for two reasons: the preferred source
|
|
// address should be recomputed, or the next-hop neighbor should be
|
|
// recomputed.
|
|
//
|
|
// Source addresses need to be recomputed or checked when the NTEs
|
|
// on the RCE's interface change state - for example a new address
|
|
// is created, a preferred address becomes deprecated, etc.
|
|
// In practice, these should be relatively infrequent situations.
|
|
//
|
|
// Next-hop determination needs to be redone in several situations:
|
|
// a neighbor is not reachable, a neighbor stops being a router,
|
|
// a route in the routing table is removed or added, etc.
|
|
// Again, these should be relatively infrequent situations.
|
|
//
|
|
// To avoid undue time & memory overheads (for example maintaining
|
|
// a linked list of all RCEs that point to an NCE and a linked list
|
|
// of all RCEs on a given interface, so that the right RCEs can
|
|
// be immediately found when something changes), we use a "lazy" approach
|
|
// based on a validation counter.
|
|
//
|
|
// There is a single global validation counter and when any state
|
|
// changes that might potentially invalidate an RCE, this counter
|
|
// is incremented. Each RCE has a snapshot of the counter that
|
|
// can be quickly checked to validate the RCE.
|
|
//
|
|
// If the RCE is invalid, then it's contents (best source address,
|
|
// next hop neighbor) are recomputed. If they are still correct,
|
|
// then the RCE's validation counter snapshot is updated.
|
|
// Otherwise the RCE's contents are updated (if nobody is using the RCE)
|
|
// or a new RCE is created and the invalid RCE is removed from the cache.
|
|
// Because the important fields in an RCE are read-only,
|
|
// an RCE can only be updated in-place if it has no external references.
|
|
//
|
|
// For efficiency, some code may cache an RCE reference for a "long"
|
|
// time, for example in a connection control block. Before using
|
|
// the cached RCE, such code should check the invalidation counter
|
|
// to ensure that the RCE is still valid. The ValidateRCE function
|
|
// performs this check.
|
|
//
|
|
// Some RCEs are "constrained" (RCE_FLAG_CONSTRAINED). This means
|
|
// that they can only be found in RouteToDestination if the caller
|
|
// explicitly specifies an outgoing interface (RCE_FLAG_CONSTRAINED_IF)
|
|
// or scopeid (RCE_FLAG_CONSTRAINED_SCOPEID). Consider
|
|
// a multi-homed node which can reach a destination via two interfaces,
|
|
// one of which is preferred (has a longer-matching-prefix route)
|
|
// over the other. An RCE for reaching the destination via the non-preferred
|
|
// interface will be marked as "constrained", to prevent its use
|
|
// when RouteToDestination is called without a constraining NTEorIF.
|
|
//
|
|
// Because specifying an interface implicitly specifies a scopeid,
|
|
// RCEs with RCE_FLAG_CONSTRAINED_IF also have RCE_FLAG_CONSTRAINED_SCOPEID.
|
|
//
|
|
// For a given destination address, all or all but one RCE for that
|
|
// destination should be "constrained". Or put another way, at most one RCE
|
|
// should not be "constrained". Or put another way, a destination address
|
|
// sans scopeid can only have one preferred outgoing interface.
|
|
// For a destination address / scopeid pair, all or all but one RCE
|
|
// for that pair should be "interface constrained".
|
|
//
|
|
// The BCE field is non-NULL if this is a home address.
|
|
// It does not hold a reference (Binding Cache Entries are not refcounted)
|
|
// and it can only be non-NULL if the RCE is in the cache.
|
|
// Access to the BCE field requires the route cache lock.
|
|
//
|
|
struct RouteCacheEntry {
|
|
RouteCacheEntry *Next; // Next RCE in cache list.
|
|
RouteCacheEntry *Prev; // Previous entry in cache list.
|
|
long RefCnt;
|
|
ushort Flags; // Peculiarities about this entry.
|
|
ushort Type; // See below.
|
|
ulong Valid; // Validation counter value.
|
|
IPv6Addr Destination; // Where this route is to.
|
|
struct NetTableEntry *NTE; // Preferred source address/interface.
|
|
NeighborCacheEntry *NCE; // First-hop neighbor.
|
|
uint LastError; // Time of last ICMP error (IPv6 ticks).
|
|
uint PathMTU; // MTU of path to destination.
|
|
uint PMTULastSet; // Time of last PMTU reduction.
|
|
BindingCacheEntry *BCE; // If this is a home address.
|
|
};
|
|
|
|
//
|
|
// These flag bits indicate whether the IF or ScopeId arguments
|
|
// to FindOrCreateRoute affected the choice of RCE.
|
|
// NB: FindOrCreateRoute assumes that these are the only flag bits.
|
|
//
|
|
#define RCE_FLAG_CONSTRAINED_IF 0x1
|
|
#define RCE_FLAG_CONSTRAINED_SCOPEID 0x2
|
|
#define RCE_FLAG_CONSTRAINED 0x3
|
|
|
|
#define RCE_TYPE_COMPUTED 1
|
|
#define RCE_TYPE_REDIRECT 2
|
|
|
|
__inline void
|
|
AddRefRCE(RouteCacheEntry *RCE)
|
|
{
|
|
InterlockedIncrement(&RCE->RefCnt);
|
|
}
|
|
|
|
extern ulong RouteCacheValidationCounter;
|
|
|
|
__inline void
|
|
InvalidateRouteCache(void)
|
|
{
|
|
InterlockedIncrement(&RouteCacheValidationCounter);
|
|
}
|
|
|
|
__inline void
|
|
InvalidateRCE(RouteCacheEntry *RCE)
|
|
{
|
|
InterlockedDecrement(&RCE->Valid);
|
|
}
|
|
|
|
//
|
|
// Structure of an entry in the route table.
|
|
//
|
|
// SitePrefixLength and PreferredLifetime
|
|
// are only used when generating a Prefix Information Option
|
|
// based on the route.
|
|
//
|
|
// If the route is published, then it does not disappear
|
|
// even when the lifetime goes to zero. It is still present
|
|
// for use in generating Router Advertisements.
|
|
// But it doesn't get used for routing.
|
|
// Similarly, system routes (RTE_TYPE_SYSTEM) are kept
|
|
// in the route table even when their lifetime is zero.
|
|
// This allows a loopback route to be allocated for an NTE/AAE
|
|
// up front, but not be enabled until the address is valid.
|
|
//
|
|
struct RouteTableEntry {
|
|
struct RouteTableEntry *Next; // Next entry on prefix list.
|
|
Interface *IF; // Relevant interface.
|
|
NeighborCacheEntry *NCE; // Next-hop neighbor (may be NULL).
|
|
IPv6Addr Prefix; // Prefix (note not all bits are valid!).
|
|
uint PrefixLength; // Number of bits in above to use as prefix.
|
|
uint SitePrefixLength; // If non-zero, indicates a site subprefix.
|
|
uint ValidLifetime; // In ticks.
|
|
uint PreferredLifetime; // In ticks.
|
|
uint Preference; // Smaller is better.
|
|
ushort Flags;
|
|
ushort Type;
|
|
};
|
|
|
|
//
|
|
// The Type field indicates where the route came from.
|
|
// These are RFC 2465 ipv6RouteProtocol values.
|
|
// Routing protocols are free to define new values.
|
|
// Only these three values are built-in.
|
|
// ntddip6.h also defines these values, as well as others.
|
|
//
|
|
#define RTE_TYPE_SYSTEM 2
|
|
#define RTE_TYPE_MANUAL 3
|
|
#define RTE_TYPE_AUTOCONF 4
|
|
|
|
__inline int
|
|
IsValidRouteTableType(uint Type)
|
|
{
|
|
return Type < (1 << 16);
|
|
}
|
|
|
|
//
|
|
// If the NCE is NULL, then the RTE specifies an on-link prefix.
|
|
// Otherwise the RTE specifies a route to the neighbor.
|
|
// As you would expect, generally the neighbor is on the interface.
|
|
// Loopback routes are an exception.
|
|
//
|
|
// The PUBLISH bit indicates that the RTE can be visible
|
|
// to RouterAdvertSend. That is, it is a "public" route.
|
|
// The IMMORTAL bit indicates that the RTE's lifetime
|
|
// does not age or countdown. It is useful in PUBLISHed RTEs,
|
|
// where the RTE's lifetime affects the lifetime in RAs.
|
|
// In non-PUBLISHed RTEs it is equivalent to an infinite lifetime.
|
|
//
|
|
#define RTE_FLAG_PUBLISH 0x00000001 // Used to create RAs.
|
|
#define RTE_FLAG_IMMORTAL 0x00000002 // Lifetime does not decrease.
|
|
|
|
//
|
|
// These values are also defined in ntddip6.h.
|
|
// Zero preference is reserved for administrative configuration.
|
|
// Smaller is more preferred than larger.
|
|
// We call these numbers preferences instead of metrics
|
|
// in an attempt to prevent confusion with the metrics
|
|
// employed by routing protocols. Routing protocol metrics
|
|
// need to be mapped into our routing table preferences.
|
|
// The largest preference value is 2^31-1, so that
|
|
// we can add a route preference and an interface preference
|
|
// without overflow.
|
|
//
|
|
#define ROUTE_PREF_LOW (16*16*16)
|
|
#define ROUTE_PREF_MEDIUM (16*16)
|
|
#define ROUTE_PREF_HIGH 16
|
|
#define ROUTE_PREF_ON_LINK 8
|
|
#define ROUTE_PREF_LOOPBACK 4
|
|
#define ROUTE_PREF_HIGHEST 0
|
|
|
|
//
|
|
// Extract a route preference value
|
|
// from the Flags field in a Router Advertisement.
|
|
//
|
|
__inline int
|
|
ExtractRoutePreference(uchar Flags)
|
|
{
|
|
switch (Flags & 0x18) {
|
|
case 0x08:
|
|
return ROUTE_PREF_HIGH;
|
|
case 0x00:
|
|
return ROUTE_PREF_MEDIUM;
|
|
case 0x18:
|
|
return ROUTE_PREF_LOW;
|
|
default:
|
|
return 0; // Invalid.
|
|
}
|
|
}
|
|
|
|
//
|
|
// Encode a route preference value
|
|
// for use in a Flags field in a Router Advertisement.
|
|
//
|
|
__inline uchar
|
|
EncodeRoutePreference(uint Preference)
|
|
{
|
|
if (Preference <= ROUTE_PREF_HIGH)
|
|
return 0x08;
|
|
else if (Preference <= ROUTE_PREF_MEDIUM)
|
|
return 0x00;
|
|
else
|
|
return 0x18;
|
|
}
|
|
|
|
__inline int
|
|
IsValidPreference(uint Preference)
|
|
{
|
|
return Preference < (1 << 31);
|
|
}
|
|
|
|
__inline int
|
|
IsOnLinkRTE(RouteTableEntry *RTE)
|
|
{
|
|
return (RTE->NCE == NULL);
|
|
}
|
|
|
|
|
|
//
|
|
// Binding cache structure. Holds references to care-of RCE's.
|
|
//
|
|
struct BindingCacheEntry {
|
|
struct BindingCacheEntry *Next;
|
|
struct BindingCacheEntry *Prev;
|
|
RouteCacheEntry *CareOfRCE;
|
|
IPv6Addr HomeAddr;
|
|
uint BindingLifetime; // Remaining lifetime (IPv6 ticks).
|
|
ushort BindingSeqNumber;
|
|
};
|
|
|
|
//
|
|
// Site prefix entry.
|
|
// Used for filtering site-local addresses returned by DNS.
|
|
//
|
|
struct SitePrefixEntry {
|
|
struct SitePrefixEntry *Next;
|
|
Interface *IF;
|
|
uint ValidLifetime; // In ticks.
|
|
uint SitePrefixLength;
|
|
IPv6Addr Prefix;
|
|
};
|
|
|
|
//
|
|
// Global data structures.
|
|
//
|
|
|
|
//
|
|
// RouteCacheLock protects the route cache and the binding cache.
|
|
// RouteTableLock protects the route table and the site-prefix table.
|
|
//
|
|
// Lock acquisition order is:
|
|
// RouteCacheLock before interface locks
|
|
// interface locks before RouteTableLock
|
|
// IoCancelSpinLock before RouteTableLock
|
|
// RouteTableLock before neighbor cache locks
|
|
//
|
|
extern KSPIN_LOCK RouteCacheLock;
|
|
extern KSPIN_LOCK RouteTableLock;
|
|
|
|
//
|
|
// The Route Cache contains RCEs. RCEs with reference count of one
|
|
// can still be cached, but they may also be reclaimed.
|
|
// (The lone reference is from the cache itself.)
|
|
//
|
|
// The current implementation is a simple circular linked-list of RCEs.
|
|
//
|
|
extern struct RouteCache {
|
|
uint Limit;
|
|
uint Count;
|
|
RouteCacheEntry *First;
|
|
RouteCacheEntry *Last;
|
|
} RouteCache;
|
|
#define SentinelRCE ((RouteCacheEntry *)&RouteCache.First)
|
|
|
|
extern struct RouteTable {
|
|
RouteTableEntry *First;
|
|
RouteTableEntry **Last;
|
|
} RouteTable;
|
|
|
|
extern struct BindingCache {
|
|
uint Limit;
|
|
uint Count;
|
|
BindingCacheEntry *First;
|
|
BindingCacheEntry *Last;
|
|
} BindingCache;
|
|
#define SentinelBCE ((BindingCacheEntry *)&BindingCache.First)
|
|
|
|
extern SitePrefixEntry *SitePrefixTable;
|
|
|
|
//
|
|
// Set to TRUE when the routing table changes
|
|
// (for example adding/removing/changing published routes)
|
|
// so that it's a good idea to send Router Advertisements
|
|
// very promptly.
|
|
//
|
|
extern int ForceRouterAdvertisements;
|
|
|
|
//
|
|
// Contains a queue of IRPs that represent
|
|
// route notification requests.
|
|
//
|
|
extern LIST_ENTRY RouteNotifyQueue;
|
|
|
|
//
|
|
// Exported function declarations.
|
|
//
|
|
|
|
int
|
|
IsLoopbackRCE(RouteCacheEntry *RCE);
|
|
|
|
int
|
|
IsDisconnectedAndNotLoopbackRCE(RouteCacheEntry *RCE);
|
|
|
|
extern IPAddr
|
|
GetV4Destination(RouteCacheEntry *RCE);
|
|
|
|
uint
|
|
GetPathMTUFromRCE(RouteCacheEntry *RCE);
|
|
|
|
uint
|
|
GetEffectivePathMTUFromRCE(RouteCacheEntry *RCE);
|
|
|
|
void
|
|
ConfirmForwardReachability(RouteCacheEntry *RCE);
|
|
|
|
void
|
|
ForwardReachabilityInDoubt(RouteCacheEntry *RCE);
|
|
|
|
uint
|
|
GetInitialRTTFromRCE(RouteCacheEntry *RCE);
|
|
|
|
|
|
extern void
|
|
ReleaseRCE(RouteCacheEntry *RCE);
|
|
|
|
extern RouteCacheEntry *
|
|
ValidateRCE(RouteCacheEntry *RCE);
|
|
|
|
#define RTD_FLAG_STRICT 0 // Must use specified IF.
|
|
#define RTD_FLAG_NORMAL 1 // Must use specified IF unless it forwards.
|
|
#define RTD_FLAG_LOOSE 2 // Only use IF to determine/check ScopeId.
|
|
|
|
extern IP_STATUS
|
|
RouteToDestination(const IPv6Addr *Destination, uint ScopeId,
|
|
NetTableEntryOrInterface *NTEorIF, uint Flags,
|
|
RouteCacheEntry **RCE);
|
|
|
|
extern void
|
|
FlushRouteCache(Interface *IF, const IPv6Addr *Addr);
|
|
|
|
extern NetTableEntry *
|
|
FindNetworkWithAddress(const IPv6Addr *Source, uint ScopeId);
|
|
|
|
extern NTSTATUS
|
|
RouteTableUpdate(PFILE_OBJECT FileObject,
|
|
Interface *IF, NeighborCacheEntry *NCE,
|
|
const IPv6Addr *Prefix, uint PrefixLength,
|
|
uint SitePrefixLength,
|
|
uint ValidLifetime, uint PreferredLifetime,
|
|
uint Pref, uint Type, int Publish, int Immortal);
|
|
|
|
extern void
|
|
SitePrefixUpdate(Interface *IF,
|
|
const IPv6Addr *Prefix, uint SitePrefixLength,
|
|
uint ValidLifetime);
|
|
|
|
extern uint
|
|
SitePrefixMatch(const IPv6Addr *Destination);
|
|
|
|
extern void
|
|
RouteTableRemove(Interface *IF);
|
|
|
|
extern void
|
|
RouteTableResetAutoConfig(Interface *IF, uint MaxLifetime);
|
|
|
|
extern void
|
|
RouteTableReset(void);
|
|
|
|
extern IP_STATUS
|
|
FindOrCreateRoute(const IPv6Addr *Dest, uint ScopeId,
|
|
Interface *IF, RouteCacheEntry **ReturnRCE);
|
|
|
|
extern IP_STATUS
|
|
FindNextHop(Interface *IF, const IPv6Addr *Dest, uint ScopeId,
|
|
NeighborCacheEntry **ReturnNCE, ushort *ReturnConstrained);
|
|
|
|
extern IP_STATUS
|
|
FindRoute(Interface *IF, const IPv6Addr *Dest, uint ScopeId,
|
|
NeighborCacheEntry **ReturnNCE, ushort *ReturnConstrained);
|
|
|
|
extern void
|
|
RouteTableTimeout(void);
|
|
|
|
extern void
|
|
SitePrefixTimeout(void);
|
|
|
|
extern void
|
|
InvalidateRouter(NeighborCacheEntry *NCE);
|
|
|
|
extern int
|
|
UpdatePathMTU(Interface *IF, const IPv6Addr *Dest, uint MTU);
|
|
|
|
extern IP_STATUS
|
|
RedirectRouteCache(const IPv6Addr *Source, const IPv6Addr *Dest,
|
|
Interface *IF, NeighborCacheEntry *NCE);
|
|
|
|
extern void
|
|
MoveToFrontBCE(BindingCacheEntry *BCE);
|
|
|
|
extern BindingCacheEntry *
|
|
FindBindingCacheEntry(const IPv6Addr *HomeAddr);
|
|
|
|
extern BindingUpdateDisposition
|
|
CacheBindingUpdate(IPv6BindingUpdateOption UNALIGNED *BindingUpdate,
|
|
const IPv6Addr *CareOfAddr,
|
|
NetTableEntryOrInterface *NTEorIF,
|
|
const IPv6Addr *HomeAddr);
|
|
|
|
extern void
|
|
BindingCacheTimeout(void);
|
|
|
|
extern void
|
|
RouterAdvertSend(Interface *IF, const IPv6Addr *Source, const IPv6Addr *Dest);
|
|
|
|
extern void
|
|
RemoveRTE(RouteTableEntry **PrevRTE, RouteTableEntry *RTE);
|
|
|
|
extern void
|
|
InsertRTEAtFront(RouteTableEntry *RTE);
|
|
|
|
extern void
|
|
InsertRTEAtBack(RouteTableEntry *RTE);
|
|
|
|
extern IP_STATUS
|
|
GetBestRouteInfo(const IPv6Addr *Addr, ulong ScopeId, IP6RouteEntry *Ire);
|
|
|
|
typedef struct {
|
|
PIO_WORKITEM WorkItem;
|
|
PIRP RequestList;
|
|
} CompleteRtChangeContext;
|
|
|
|
typedef struct {
|
|
KIRQL OldIrql;
|
|
PIRP RequestList;
|
|
PIRP *LastRequest;
|
|
CompleteRtChangeContext *Context;
|
|
} CheckRtChangeContext;
|
|
|
|
__inline void
|
|
InitCheckRtChangeContext(CheckRtChangeContext *Context)
|
|
{
|
|
// Context->OldIrql must be initialized separately.
|
|
Context->RequestList = NULL;
|
|
Context->LastRequest = &Context->RequestList;
|
|
Context->Context = NULL;
|
|
}
|
|
|
|
extern void
|
|
CheckRtChangeNotifyRequests(
|
|
CheckRtChangeContext *Context,
|
|
PFILE_OBJECT FileObject,
|
|
RouteTableEntry *RTE);
|
|
|
|
extern void
|
|
CompleteRtChangeNotifyRequests(CheckRtChangeContext *Context);
|
|
|
|
#endif // ROUTE_INCLUDED
|