// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil -*- (for GNU Emacs) // // Copyright (c) 1998-2000 Microsoft Corporation // // This file is part of the Microsoft Research IPv6 Network Protocol Stack. // You should have received a copy of the Microsoft End-User License Agreement // for this software along with this release; see the file "license.txt". // If not, please see http://www.research.microsoft.com/msripv6/license.htm, // or write to Microsoft Research, One Microsoft Way, Redmond, WA 98052-6399. // // Abstract: // // Routing code external definitions for Internet Protocol Version 6. // #ifndef ROUTE_INCLUDED #define ROUTE_INCLUDED 1 #ifndef IPINFO_INCLUDED # include #endif typedef struct BindingCacheEntry BindingCacheEntry; typedef struct RouteTableEntry RouteTableEntry; typedef struct SitePrefixEntry SitePrefixEntry; extern void InitRouting(void); extern void UnloadRouting(void); // // Structure of a route cache entry. // // A route cache entry (RCE) primarily caches two computations: // next-hop determination and source address selection. // An RCE also caches other information related to the destination, // like path MTU. // // An RCE can also be created as a result of receiving an Redirect // ICMP message. // // There is at most one RCE per destination address / interface pair. // Our route cache corresponds to the destination cache // mentioned in RFC 1970's conceptual data structures, // with the addition of support for multi-homed nodes. // // The primary lookup key for RCEs is the destination address. // The current implementation just searches a list of all RCEs, // but a hash table or tree data structure would be preferable. // // Some nodes (like busy servers) might have many thousands of RCEs // but only tens of NCEs, because most destinations are reached // through only a few neighbor routers. Some nodes (like busy routers) // will have relatively few RCEs and hundreds of NCEs, because // forwarding does not use an RCE. // // The three major components of an RCE are the destination address, // NTE (indicates both the interface, and the best source address // on that interface to use for this destination), and NCE // (neighbor to which to send packets for this destination). // // Once an RCE is created, these three components are read-only // and anyone who holds a reference for the RCE can rely on // them not changing. The RCE holds references for the NTE and NCE. // This allows code that holds an RCE to access the important // fields without acquiring any locks. Fields like the path MTU // can also be safely read without a lock. // // When an RCE becomes invalid, it is removed from the route cache // but it is not deallocated until it has zero references. // The route cache itself holds one reference on RCEs in the cache. // // Because an RCE caches the result of two computations, RCEs can // become invalid (stale) for two reasons: the preferred source // address should be recomputed, or the next-hop neighbor should be // recomputed. // // Source addresses need to be recomputed or checked when the NTEs // on the RCE's interface change state - for example a new address // is created, a preferred address becomes deprecated, etc. // In practice, these should be relatively infrequent situations. // // Next-hop determination needs to be redone in several situations: // a neighbor is not reachable, a neighbor stops being a router, // a route in the routing table is removed or added, etc. // Again, these should be relatively infrequent situations. // // To avoid undue time & memory overheads (for example maintaining // a linked list of all RCEs that point to an NCE and a linked list // of all RCEs on a given interface, so that the right RCEs can // be immediately found when something changes), we use a "lazy" approach // based on a validation counter. // // There is a single global validation counter and when any state // changes that might potentially invalidate an RCE, this counter // is incremented. Each RCE has a snapshot of the counter that // can be quickly checked to validate the RCE. // // If the RCE is invalid, then it's contents (best source address, // next hop neighbor) are recomputed. If they are still correct, // then the RCE's validation counter snapshot is updated. // Otherwise the RCE's contents are updated (if nobody is using the RCE) // or a new RCE is created and the invalid RCE is removed from the cache. // Because the important fields in an RCE are read-only, // an RCE can only be updated in-place if it has no external references. // // For efficiency, some code may cache an RCE reference for a "long" // time, for example in a connection control block. Before using // the cached RCE, such code should check the invalidation counter // to ensure that the RCE is still valid. The ValidateRCE function // performs this check. // // Some RCEs are "constrained" (RCE_FLAG_CONSTRAINED). This means // that they can only be found in RouteToDestination if the caller // explicitly specifies an outgoing interface (RCE_FLAG_CONSTRAINED_IF) // or scopeid (RCE_FLAG_CONSTRAINED_SCOPEID). Consider // a multi-homed node which can reach a destination via two interfaces, // one of which is preferred (has a longer-matching-prefix route) // over the other. An RCE for reaching the destination via the non-preferred // interface will be marked as "constrained", to prevent its use // when RouteToDestination is called without a constraining NTEorIF. // // Because specifying an interface implicitly specifies a scopeid, // RCEs with RCE_FLAG_CONSTRAINED_IF also have RCE_FLAG_CONSTRAINED_SCOPEID. // // For a given destination address, all or all but one RCE for that // destination should be "constrained". Or put another way, at most one RCE // should not be "constrained". Or put another way, a destination address // sans scopeid can only have one preferred outgoing interface. // For a destination address / scopeid pair, all or all but one RCE // for that pair should be "interface constrained". // // The BCE field is non-NULL if this is a home address. // It does not hold a reference (Binding Cache Entries are not refcounted) // and it can only be non-NULL if the RCE is in the cache. // Access to the BCE field requires the route cache lock. // struct RouteCacheEntry { RouteCacheEntry *Next; // Next RCE in cache list. RouteCacheEntry *Prev; // Previous entry in cache list. long RefCnt; ushort Flags; // Peculiarities about this entry. ushort Type; // See below. ulong Valid; // Validation counter value. IPv6Addr Destination; // Where this route is to. struct NetTableEntry *NTE; // Preferred source address/interface. NeighborCacheEntry *NCE; // First-hop neighbor. uint LastError; // Time of last ICMP error (IPv6 ticks). uint PathMTU; // MTU of path to destination. uint PMTULastSet; // Time of last PMTU reduction. BindingCacheEntry *BCE; // If this is a home address. }; // // These flag bits indicate whether the IF or ScopeId arguments // to FindOrCreateRoute affected the choice of RCE. // NB: FindOrCreateRoute assumes that these are the only flag bits. // #define RCE_FLAG_CONSTRAINED_IF 0x1 #define RCE_FLAG_CONSTRAINED_SCOPEID 0x2 #define RCE_FLAG_CONSTRAINED 0x3 #define RCE_TYPE_COMPUTED 1 #define RCE_TYPE_REDIRECT 2 __inline void AddRefRCE(RouteCacheEntry *RCE) { InterlockedIncrement(&RCE->RefCnt); } extern ulong RouteCacheValidationCounter; __inline void InvalidateRouteCache(void) { InterlockedIncrement(&RouteCacheValidationCounter); } __inline void InvalidateRCE(RouteCacheEntry *RCE) { InterlockedDecrement(&RCE->Valid); } // // Structure of an entry in the route table. // // SitePrefixLength and PreferredLifetime // are only used when generating a Prefix Information Option // based on the route. // // If the route is published, then it does not disappear // even when the lifetime goes to zero. It is still present // for use in generating Router Advertisements. // But it doesn't get used for routing. // Similarly, system routes (RTE_TYPE_SYSTEM) are kept // in the route table even when their lifetime is zero. // This allows a loopback route to be allocated for an NTE/AAE // up front, but not be enabled until the address is valid. // struct RouteTableEntry { struct RouteTableEntry *Next; // Next entry on prefix list. Interface *IF; // Relevant interface. NeighborCacheEntry *NCE; // Next-hop neighbor (may be NULL). IPv6Addr Prefix; // Prefix (note not all bits are valid!). uint PrefixLength; // Number of bits in above to use as prefix. uint SitePrefixLength; // If non-zero, indicates a site subprefix. uint ValidLifetime; // In ticks. uint PreferredLifetime; // In ticks. uint Preference; // Smaller is better. ushort Flags; ushort Type; }; // // The Type field indicates where the route came from. // These are RFC 2465 ipv6RouteProtocol values. // Routing protocols are free to define new values. // Only these three values are built-in. // ntddip6.h also defines these values, as well as others. // #define RTE_TYPE_SYSTEM 2 #define RTE_TYPE_MANUAL 3 #define RTE_TYPE_AUTOCONF 4 __inline int IsValidRouteTableType(uint Type) { return Type < (1 << 16); } // // If the NCE is NULL, then the RTE specifies an on-link prefix. // Otherwise the RTE specifies a route to the neighbor. // As you would expect, generally the neighbor is on the interface. // Loopback routes are an exception. // // The PUBLISH bit indicates that the RTE can be visible // to RouterAdvertSend. That is, it is a "public" route. // The IMMORTAL bit indicates that the RTE's lifetime // does not age or countdown. It is useful in PUBLISHed RTEs, // where the RTE's lifetime affects the lifetime in RAs. // In non-PUBLISHed RTEs it is equivalent to an infinite lifetime. // #define RTE_FLAG_PUBLISH 0x00000001 // Used to create RAs. #define RTE_FLAG_IMMORTAL 0x00000002 // Lifetime does not decrease. // // These values are also defined in ntddip6.h. // Zero preference is reserved for administrative configuration. // Smaller is more preferred than larger. // We call these numbers preferences instead of metrics // in an attempt to prevent confusion with the metrics // employed by routing protocols. Routing protocol metrics // need to be mapped into our routing table preferences. // The largest preference value is 2^31-1, so that // we can add a route preference and an interface preference // without overflow. // #define ROUTE_PREF_LOW (16*16*16) #define ROUTE_PREF_MEDIUM (16*16) #define ROUTE_PREF_HIGH 16 #define ROUTE_PREF_ON_LINK 8 #define ROUTE_PREF_LOOPBACK 4 #define ROUTE_PREF_HIGHEST 0 // // Extract a route preference value // from the Flags field in a Router Advertisement. // __inline int ExtractRoutePreference(uchar Flags) { switch (Flags & 0x18) { case 0x08: return ROUTE_PREF_HIGH; case 0x00: return ROUTE_PREF_MEDIUM; case 0x18: return ROUTE_PREF_LOW; default: return 0; // Invalid. } } // // Encode a route preference value // for use in a Flags field in a Router Advertisement. // __inline uchar EncodeRoutePreference(uint Preference) { if (Preference <= ROUTE_PREF_HIGH) return 0x08; else if (Preference <= ROUTE_PREF_MEDIUM) return 0x00; else return 0x18; } __inline int IsValidPreference(uint Preference) { return Preference < (1 << 31); } __inline int IsOnLinkRTE(RouteTableEntry *RTE) { return (RTE->NCE == NULL); } // // Binding cache structure. Holds references to care-of RCE's. // struct BindingCacheEntry { struct BindingCacheEntry *Next; struct BindingCacheEntry *Prev; RouteCacheEntry *CareOfRCE; IPv6Addr HomeAddr; uint BindingLifetime; // Remaining lifetime (IPv6 ticks). ushort BindingSeqNumber; }; // // Site prefix entry. // Used for filtering site-local addresses returned by DNS. // struct SitePrefixEntry { struct SitePrefixEntry *Next; Interface *IF; uint ValidLifetime; // In ticks. uint SitePrefixLength; IPv6Addr Prefix; }; // // Global data structures. // // // RouteCacheLock protects the route cache and the binding cache. // RouteTableLock protects the route table and the site-prefix table. // // Lock acquisition order is: // RouteCacheLock before interface locks // interface locks before RouteTableLock // IoCancelSpinLock before RouteTableLock // RouteTableLock before neighbor cache locks // extern KSPIN_LOCK RouteCacheLock; extern KSPIN_LOCK RouteTableLock; // // The Route Cache contains RCEs. RCEs with reference count of one // can still be cached, but they may also be reclaimed. // (The lone reference is from the cache itself.) // // The current implementation is a simple circular linked-list of RCEs. // extern struct RouteCache { uint Limit; uint Count; RouteCacheEntry *First; RouteCacheEntry *Last; } RouteCache; #define SentinelRCE ((RouteCacheEntry *)&RouteCache.First) extern struct RouteTable { RouteTableEntry *First; RouteTableEntry **Last; } RouteTable; extern struct BindingCache { uint Limit; uint Count; BindingCacheEntry *First; BindingCacheEntry *Last; } BindingCache; #define SentinelBCE ((BindingCacheEntry *)&BindingCache.First) extern SitePrefixEntry *SitePrefixTable; // // Set to TRUE when the routing table changes // (for example adding/removing/changing published routes) // so that it's a good idea to send Router Advertisements // very promptly. // extern int ForceRouterAdvertisements; // // Contains a queue of IRPs that represent // route notification requests. // extern LIST_ENTRY RouteNotifyQueue; // // Exported function declarations. // int IsLoopbackRCE(RouteCacheEntry *RCE); int IsDisconnectedAndNotLoopbackRCE(RouteCacheEntry *RCE); extern IPAddr GetV4Destination(RouteCacheEntry *RCE); uint GetPathMTUFromRCE(RouteCacheEntry *RCE); uint GetEffectivePathMTUFromRCE(RouteCacheEntry *RCE); void ConfirmForwardReachability(RouteCacheEntry *RCE); void ForwardReachabilityInDoubt(RouteCacheEntry *RCE); uint GetInitialRTTFromRCE(RouteCacheEntry *RCE); extern void ReleaseRCE(RouteCacheEntry *RCE); extern RouteCacheEntry * ValidateRCE(RouteCacheEntry *RCE); #define RTD_FLAG_STRICT 0 // Must use specified IF. #define RTD_FLAG_NORMAL 1 // Must use specified IF unless it forwards. #define RTD_FLAG_LOOSE 2 // Only use IF to determine/check ScopeId. extern IP_STATUS RouteToDestination(const IPv6Addr *Destination, uint ScopeId, NetTableEntryOrInterface *NTEorIF, uint Flags, RouteCacheEntry **RCE); extern void FlushRouteCache(Interface *IF, const IPv6Addr *Addr); extern NetTableEntry * FindNetworkWithAddress(const IPv6Addr *Source, uint ScopeId); extern NTSTATUS RouteTableUpdate(PFILE_OBJECT FileObject, Interface *IF, NeighborCacheEntry *NCE, const IPv6Addr *Prefix, uint PrefixLength, uint SitePrefixLength, uint ValidLifetime, uint PreferredLifetime, uint Pref, uint Type, int Publish, int Immortal); extern void SitePrefixUpdate(Interface *IF, const IPv6Addr *Prefix, uint SitePrefixLength, uint ValidLifetime); extern uint SitePrefixMatch(const IPv6Addr *Destination); extern void RouteTableRemove(Interface *IF); extern void RouteTableResetAutoConfig(Interface *IF, uint MaxLifetime); extern void RouteTableReset(void); extern IP_STATUS FindOrCreateRoute(const IPv6Addr *Dest, uint ScopeId, Interface *IF, RouteCacheEntry **ReturnRCE); extern IP_STATUS FindNextHop(Interface *IF, const IPv6Addr *Dest, uint ScopeId, NeighborCacheEntry **ReturnNCE, ushort *ReturnConstrained); extern IP_STATUS FindRoute(Interface *IF, const IPv6Addr *Dest, uint ScopeId, NeighborCacheEntry **ReturnNCE, ushort *ReturnConstrained); extern void RouteTableTimeout(void); extern void SitePrefixTimeout(void); extern void InvalidateRouter(NeighborCacheEntry *NCE); extern int UpdatePathMTU(Interface *IF, const IPv6Addr *Dest, uint MTU); extern IP_STATUS RedirectRouteCache(const IPv6Addr *Source, const IPv6Addr *Dest, Interface *IF, NeighborCacheEntry *NCE); extern void MoveToFrontBCE(BindingCacheEntry *BCE); extern BindingCacheEntry * FindBindingCacheEntry(const IPv6Addr *HomeAddr); extern BindingUpdateDisposition CacheBindingUpdate(IPv6BindingUpdateOption UNALIGNED *BindingUpdate, const IPv6Addr *CareOfAddr, NetTableEntryOrInterface *NTEorIF, const IPv6Addr *HomeAddr); extern void BindingCacheTimeout(void); extern void RouterAdvertSend(Interface *IF, const IPv6Addr *Source, const IPv6Addr *Dest); extern void RemoveRTE(RouteTableEntry **PrevRTE, RouteTableEntry *RTE); extern void InsertRTEAtFront(RouteTableEntry *RTE); extern void InsertRTEAtBack(RouteTableEntry *RTE); extern IP_STATUS GetBestRouteInfo(const IPv6Addr *Addr, ulong ScopeId, IP6RouteEntry *Ire); typedef struct { PIO_WORKITEM WorkItem; PIRP RequestList; } CompleteRtChangeContext; typedef struct { KIRQL OldIrql; PIRP RequestList; PIRP *LastRequest; CompleteRtChangeContext *Context; } CheckRtChangeContext; __inline void InitCheckRtChangeContext(CheckRtChangeContext *Context) { // Context->OldIrql must be initialized separately. Context->RequestList = NULL; Context->LastRequest = &Context->RequestList; Context->Context = NULL; } extern void CheckRtChangeNotifyRequests( CheckRtChangeContext *Context, PFILE_OBJECT FileObject, RouteTableEntry *RTE); extern void CompleteRtChangeNotifyRequests(CheckRtChangeContext *Context); #endif // ROUTE_INCLUDED