windows-nt/Source/XPSP1/NT/net/tcpip/tpipv6/tcpip6/inc/route.h

// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil -*- (for GNU Emacs)
//
// Copyright (c) 1998-2000 Microsoft Corporation
//
// This file is part of the Microsoft Research IPv6 Network Protocol Stack.
// You should have received a copy of the Microsoft End-User License Agreement
// for this software along with this release; see the file "license.txt".
// If not, please see http://www.research.microsoft.com/msripv6/license.htm,
// or write to Microsoft Research, One Microsoft Way, Redmond, WA 98052-6399.
//
// Abstract:
//
// Routing code external definitions for Internet Protocol Version 6.
//


#ifndef ROUTE_INCLUDED
#define ROUTE_INCLUDED 1

#ifndef IPINFO_INCLUDED
# include <ipinfo.h>
#endif

typedef struct BindingCacheEntry BindingCacheEntry;
typedef struct RouteTableEntry RouteTableEntry;
typedef struct SitePrefixEntry SitePrefixEntry;

extern void InitRouting(void);

extern void UnloadRouting(void);

//
// Structure of a route cache entry.
//
// A route cache entry (RCE) primarily caches two computations:
// next-hop determination and source address selection.
// An RCE also caches other information related to the destination,
// like path MTU.
//
// An RCE can also be created as a result of receiving an Redirect
// ICMP message.
//
// There is at most one RCE per destination address / interface pair.
// Our route cache corresponds to the destination cache
// mentioned in RFC 1970's conceptual data structures,
// with the addition of support for multi-homed nodes.
//
// The primary lookup key for RCEs is the destination address.
// The current implementation just searches a list of all RCEs,
// but a hash table or tree data structure would be preferable.
//
// Some nodes (like busy servers) might have many thousands of RCEs
// but only tens of NCEs, because most destinations are reached
// through only a few neighbor routers. Some nodes (like busy routers)
// will have relatively few RCEs and hundreds of NCEs, because
// forwarding does not use an RCE.
//
// The three major components of an RCE are the destination address,
// NTE (indicates both the interface, and the best source address
// on that interface to use for this destination), and NCE
// (neighbor to which to send packets for this destination).
//
// Once an RCE is created, these three components are read-only
// and anyone who holds a reference for the RCE can rely on
// them not changing. The RCE holds references for the NTE and NCE.
// This allows code that holds an RCE to access the important
// fields without acquiring any locks. Fields like the path MTU
// can also be safely read without a lock.
//
// When an RCE becomes invalid, it is removed from the route cache
// but it is not deallocated until it has zero references.
// The route cache itself holds one reference on RCEs in the cache.
//
// Because an RCE caches the result of two computations, RCEs can
// become invalid (stale) for two reasons: the preferred source
// address should be recomputed, or the next-hop neighbor should be
// recomputed.
//
// Source addresses need to be recomputed or checked when the NTEs
// on the RCE's interface change state - for example a new address
// is created, a preferred address becomes deprecated, etc.
// In practice, these should be relatively infrequent situations.
//
// Next-hop determination needs to be redone in several situations:
// a neighbor is not reachable, a neighbor stops being a router,
// a route in the routing table is removed or added, etc.
// Again, these should be relatively infrequent situations.
//
// To avoid undue time & memory overheads (for example maintaining
// a linked list of all RCEs that point to an NCE and a linked list
// of all RCEs on a given interface, so that the right RCEs can
// be immediately found when something changes), we use a "lazy" approach
// based on a validation counter.
//
// There is a single global validation counter and when any state
// changes that might potentially invalidate an RCE, this counter
// is incremented. Each RCE has a snapshot of the counter that
// can be quickly checked to validate the RCE.
//
// If the RCE is invalid, then it's contents (best source address,
// next hop neighbor) are recomputed. If they are still correct,
// then the RCE's validation counter snapshot is updated.
// Otherwise the RCE's contents are updated (if nobody is using the RCE)
// or a new RCE is created and the invalid RCE is removed from the cache.
// Because the important fields in an RCE are read-only,
// an RCE can only be updated in-place if it has no external references.
//
// For efficiency, some code may cache an RCE reference for a "long"
// time, for example in a connection control block. Before using
// the cached RCE, such code should check the invalidation counter
// to ensure that the RCE is still valid. The ValidateRCE function
// performs this check.
//
// Some RCEs are "constrained" (RCE_FLAG_CONSTRAINED). This means
// that they can only be found in RouteToDestination if the caller
// explicitly specifies an outgoing interface (RCE_FLAG_CONSTRAINED_IF)
// or scopeid (RCE_FLAG_CONSTRAINED_SCOPEID). Consider
// a multi-homed node which can reach a destination via two interfaces,
// one of which is preferred (has a longer-matching-prefix route)
// over the other. An RCE for reaching the destination via the non-preferred
// interface will be marked as "constrained", to prevent its use
// when RouteToDestination is called without a constraining NTEorIF.
//
// Because specifying an interface implicitly specifies a scopeid,
// RCEs with RCE_FLAG_CONSTRAINED_IF also have RCE_FLAG_CONSTRAINED_SCOPEID.
//
// For a given destination address, all or all but one RCE for that
// destination should be "constrained". Or put another way, at most one RCE
// should not be "constrained". Or put another way, a destination address
// sans scopeid can only have one preferred outgoing interface.
// For a destination address / scopeid pair, all or all but one RCE
// for that pair should be "interface constrained".
//
// The BCE field is non-NULL if this is a home address.
// It does not hold a reference (Binding Cache Entries are not refcounted)
// and it can only be non-NULL if the RCE is in the cache.
// Access to the BCE field requires the route cache lock.
//
struct RouteCacheEntry {
    RouteCacheEntry *Next;           // Next RCE in cache list.
    RouteCacheEntry *Prev;           // Previous entry in cache list.
    long RefCnt;
    ushort Flags;                    // Peculiarities about this entry.
    ushort Type;                     // See below.
    ulong Valid;                     // Validation counter value.
    IPv6Addr Destination;            // Where this route is to.
    struct NetTableEntry *NTE;       // Preferred source address/interface.
    NeighborCacheEntry *NCE;         // First-hop neighbor.
    uint LastError;                  // Time of last ICMP error (IPv6 ticks).
    uint PathMTU;                    // MTU of path to destination.
    uint PMTULastSet;                // Time of last PMTU reduction.
    BindingCacheEntry *BCE;          // If this is a home address.
};

//
// These flag bits indicate whether the IF or ScopeId arguments
// to FindOrCreateRoute affected the choice of RCE.
// NB: FindOrCreateRoute assumes that these are the only flag bits.
//
#define RCE_FLAG_CONSTRAINED_IF         0x1
#define RCE_FLAG_CONSTRAINED_SCOPEID    0x2
#define RCE_FLAG_CONSTRAINED            0x3

#define RCE_TYPE_COMPUTED 1
#define RCE_TYPE_REDIRECT 2

__inline void
AddRefRCE(RouteCacheEntry *RCE)
{
    InterlockedIncrement(&RCE->RefCnt);
}

extern ulong RouteCacheValidationCounter;

__inline void
InvalidateRouteCache(void)
{
    InterlockedIncrement(&RouteCacheValidationCounter);
}

__inline void
InvalidateRCE(RouteCacheEntry *RCE)
{
    InterlockedDecrement(&RCE->Valid);
}

//
// Structure of an entry in the route table.
//
// SitePrefixLength and PreferredLifetime
// are only used when generating a Prefix Information Option
// based on the route.
//
// If the route is published, then it does not disappear
// even when the lifetime goes to zero. It is still present
// for use in generating Router Advertisements.
// But it doesn't get used for routing.
// Similarly, system routes (RTE_TYPE_SYSTEM) are kept
// in the route table even when their lifetime is zero.
// This allows a loopback route to be allocated for an NTE/AAE
// up front, but not be enabled until the address is valid.
//
struct RouteTableEntry {
    struct RouteTableEntry *Next;  // Next entry on prefix list.
    Interface *IF;                 // Relevant interface.
    NeighborCacheEntry *NCE;       // Next-hop neighbor (may be NULL).
    IPv6Addr Prefix;               // Prefix (note not all bits are valid!).
    uint PrefixLength;             // Number of bits in above to use as prefix.
    uint SitePrefixLength;         // If non-zero, indicates a site subprefix.
    uint ValidLifetime;            // In ticks.
    uint PreferredLifetime;        // In ticks.
    uint Preference;               // Smaller is better.
    ushort Flags;
    ushort Type;
};

//
// The Type field indicates where the route came from.
// These are RFC 2465 ipv6RouteProtocol values.
// Routing protocols are free to define new values.
// Only these three values are built-in.
// ntddip6.h also defines these values, as well as others.
//
#define RTE_TYPE_SYSTEM         2
#define RTE_TYPE_MANUAL         3
#define RTE_TYPE_AUTOCONF       4

__inline int
IsValidRouteTableType(uint Type)
{
    return Type < (1 << 16);
}

//
// If the NCE is NULL, then the RTE specifies an on-link prefix.
// Otherwise the RTE specifies a route to the neighbor.
// As you would expect, generally the neighbor is on the interface.
// Loopback routes are an exception.
//
// The PUBLISH bit indicates that the RTE can be visible
// to RouterAdvertSend. That is, it is a "public" route.
// The IMMORTAL bit indicates that the RTE's lifetime
// does not age or countdown. It is useful in PUBLISHed RTEs,
// where the RTE's lifetime affects the lifetime in RAs.
// In non-PUBLISHed RTEs it is equivalent to an infinite lifetime.
//
#define RTE_FLAG_PUBLISH        0x00000001      // Used to create RAs.
#define RTE_FLAG_IMMORTAL       0x00000002      // Lifetime does not decrease.

//
// These values are also defined in ntddip6.h.
// Zero preference is reserved for administrative configuration.
// Smaller is more preferred than larger.
// We call these numbers preferences instead of metrics
// in an attempt to prevent confusion with the metrics
// employed by routing protocols. Routing protocol metrics
// need to be mapped into our routing table preferences.
// The largest preference value is 2^31-1, so that
// we can add a route preference and an interface preference
// without overflow.
//
#define ROUTE_PREF_LOW          (16*16*16)
#define ROUTE_PREF_MEDIUM       (16*16)
#define ROUTE_PREF_HIGH         16
#define ROUTE_PREF_ON_LINK      8
#define ROUTE_PREF_LOOPBACK     4
#define ROUTE_PREF_HIGHEST      0

//
// Extract a route preference value
// from the Flags field in a Router Advertisement.
//
__inline int
ExtractRoutePreference(uchar Flags)
{
    switch (Flags & 0x18) {
    case 0x08:
        return ROUTE_PREF_HIGH;
    case 0x00:
        return ROUTE_PREF_MEDIUM;
    case 0x18:
        return ROUTE_PREF_LOW;
    default:
        return 0;       // Invalid.
    }
}

//
// Encode a route preference value
// for use in a Flags field in a Router Advertisement.
//
__inline uchar
EncodeRoutePreference(uint Preference)
{
    if (Preference <= ROUTE_PREF_HIGH)
        return 0x08;
    else if (Preference <= ROUTE_PREF_MEDIUM)
        return 0x00;
    else
        return 0x18;
}

__inline int
IsValidPreference(uint Preference)
{
    return Preference < (1 << 31);
}

__inline int
IsOnLinkRTE(RouteTableEntry *RTE)
{
    return (RTE->NCE == NULL);
}


//
// Binding cache structure.  Holds references to care-of RCE's.
//
struct BindingCacheEntry {
    struct BindingCacheEntry *Next;
    struct BindingCacheEntry *Prev;
    RouteCacheEntry *CareOfRCE;
    IPv6Addr HomeAddr;
    uint BindingLifetime;            // Remaining lifetime (IPv6 ticks).
    ushort BindingSeqNumber;
};

//
// Site prefix entry.
// Used for filtering site-local addresses returned by DNS.
//
struct SitePrefixEntry {
    struct SitePrefixEntry *Next;
    Interface *IF;
    uint ValidLifetime;            // In ticks.
    uint SitePrefixLength;
    IPv6Addr Prefix;
};

//
// Global data structures.
//

//
// RouteCacheLock protects the route cache and the binding cache.
// RouteTableLock protects the route table and the site-prefix table.
//
// Lock acquisition order is:
//      RouteCacheLock before interface locks
//      interface locks before RouteTableLock
//      IoCancelSpinLock before RouteTableLock
//      RouteTableLock before neighbor cache locks
//
extern KSPIN_LOCK RouteCacheLock;
extern KSPIN_LOCK RouteTableLock;

//
// The Route Cache contains RCEs. RCEs with reference count of one
// can still be cached, but they may also be reclaimed.
// (The lone reference is from the cache itself.)
//
// The current implementation is a simple circular linked-list of RCEs.
//
extern struct RouteCache {
    uint Limit;
    uint Count;
    RouteCacheEntry *First;
    RouteCacheEntry *Last;
} RouteCache;
#define SentinelRCE     ((RouteCacheEntry *)&RouteCache.First)

extern struct RouteTable {
    RouteTableEntry *First;
    RouteTableEntry **Last;
} RouteTable;

extern struct BindingCache {
    uint Limit;
    uint Count;
    BindingCacheEntry *First;
    BindingCacheEntry *Last;
} BindingCache;
#define SentinelBCE     ((BindingCacheEntry *)&BindingCache.First)

extern SitePrefixEntry *SitePrefixTable;

//
// Set to TRUE when the routing table changes
// (for example adding/removing/changing published routes)
// so that it's a good idea to send Router Advertisements
// very promptly.
//
extern int ForceRouterAdvertisements;

//
// Contains a queue of IRPs that represent
// route notification requests.
//
extern LIST_ENTRY RouteNotifyQueue;

//
// Exported function declarations.
//

int
IsLoopbackRCE(RouteCacheEntry *RCE);

int
IsDisconnectedAndNotLoopbackRCE(RouteCacheEntry *RCE);

extern IPAddr
GetV4Destination(RouteCacheEntry *RCE);

uint
GetPathMTUFromRCE(RouteCacheEntry *RCE);

uint
GetEffectivePathMTUFromRCE(RouteCacheEntry *RCE);

void
ConfirmForwardReachability(RouteCacheEntry *RCE);

void
ForwardReachabilityInDoubt(RouteCacheEntry *RCE);

uint
GetInitialRTTFromRCE(RouteCacheEntry *RCE);


extern void
ReleaseRCE(RouteCacheEntry *RCE);

extern RouteCacheEntry *
ValidateRCE(RouteCacheEntry *RCE);

#define RTD_FLAG_STRICT 0       // Must use specified IF.
#define RTD_FLAG_NORMAL 1       // Must use specified IF unless it forwards.
#define RTD_FLAG_LOOSE  2       // Only use IF to determine/check ScopeId.

extern IP_STATUS
RouteToDestination(const IPv6Addr *Destination, uint ScopeId,
                   NetTableEntryOrInterface *NTEorIF, uint Flags,
                   RouteCacheEntry **RCE);

extern void
FlushRouteCache(Interface *IF, const IPv6Addr *Addr);

extern NetTableEntry *
FindNetworkWithAddress(const IPv6Addr *Source, uint ScopeId);

extern NTSTATUS
RouteTableUpdate(PFILE_OBJECT FileObject,
                 Interface *IF, NeighborCacheEntry *NCE,
                 const IPv6Addr *Prefix, uint PrefixLength,
                 uint SitePrefixLength,
                 uint ValidLifetime, uint PreferredLifetime,
                 uint Pref, uint Type, int Publish, int Immortal);

extern void
SitePrefixUpdate(Interface *IF,
                 const IPv6Addr *Prefix, uint SitePrefixLength,
                 uint ValidLifetime);

extern uint
SitePrefixMatch(const IPv6Addr *Destination);

extern void
RouteTableRemove(Interface *IF);

extern void
RouteTableResetAutoConfig(Interface *IF, uint MaxLifetime);

extern void
RouteTableReset(void);

extern IP_STATUS
FindOrCreateRoute(const IPv6Addr *Dest, uint ScopeId,
                  Interface *IF, RouteCacheEntry **ReturnRCE);

extern IP_STATUS
FindNextHop(Interface *IF, const IPv6Addr *Dest, uint ScopeId,
            NeighborCacheEntry **ReturnNCE, ushort *ReturnConstrained);

extern IP_STATUS
FindRoute(Interface *IF, const IPv6Addr *Dest, uint ScopeId,
          NeighborCacheEntry **ReturnNCE, ushort *ReturnConstrained);

extern void
RouteTableTimeout(void);

extern void
SitePrefixTimeout(void);

extern void
InvalidateRouter(NeighborCacheEntry *NCE);

extern int
UpdatePathMTU(Interface *IF, const IPv6Addr *Dest, uint MTU);

extern IP_STATUS
RedirectRouteCache(const IPv6Addr *Source, const IPv6Addr *Dest,
                   Interface *IF, NeighborCacheEntry *NCE);

extern void
MoveToFrontBCE(BindingCacheEntry *BCE);

extern BindingCacheEntry *
FindBindingCacheEntry(const IPv6Addr *HomeAddr);

extern BindingUpdateDisposition
CacheBindingUpdate(IPv6BindingUpdateOption UNALIGNED *BindingUpdate,
                   const IPv6Addr *CareOfAddr,
                   NetTableEntryOrInterface *NTEorIF,
                   const IPv6Addr *HomeAddr);

extern void
BindingCacheTimeout(void);

extern void
RouterAdvertSend(Interface *IF, const IPv6Addr *Source, const IPv6Addr *Dest);

extern void
RemoveRTE(RouteTableEntry **PrevRTE, RouteTableEntry *RTE);

extern void
InsertRTEAtFront(RouteTableEntry *RTE);

extern void
InsertRTEAtBack(RouteTableEntry *RTE);

extern IP_STATUS
GetBestRouteInfo(const IPv6Addr *Addr, ulong ScopeId, IP6RouteEntry *Ire);

typedef struct {
    PIO_WORKITEM WorkItem;
    PIRP RequestList;
} CompleteRtChangeContext;

typedef struct {
    KIRQL OldIrql;
    PIRP RequestList;
    PIRP *LastRequest;
    CompleteRtChangeContext *Context;
} CheckRtChangeContext;

__inline void
InitCheckRtChangeContext(CheckRtChangeContext *Context)
{
    // Context->OldIrql must be initialized separately.
    Context->RequestList = NULL;
    Context->LastRequest = &Context->RequestList;
    Context->Context = NULL;
}

extern void
CheckRtChangeNotifyRequests(
    CheckRtChangeContext *Context,
    PFILE_OBJECT FileObject,
    RouteTableEntry *RTE);

extern void
CompleteRtChangeNotifyRequests(CheckRtChangeContext *Context);

#endif  // ROUTE_INCLUDED