windows-nt/Source/XPSP1/NT/net/tcpip/driver/ipv4/iproute.c

/*++

Copyright (c) 1990-2000  Microsoft Corporation

Module Name:

       IPROUTE.C

Abstract:

  This file contains all the route table manipulation code

Author:


[Environment:]

    kernel mode only

[Notes:]

    optional-notes

Revision History:


--*/

//***   iproute.c - IP routing routines.
//
//  This file contains all the routines related to IP routing, including
//  routing table lookup and management routines.

#include "precomp.h"
#include "info.h"
#include "iproute.h"
#include "iprtdef.h"
#include "lookup.h"
#include "ipxmit.h"
#include "igmp.h"
#include "mdlpool.h"
#include "pplasl.h"
#include "tcpipbuf.h"

extern uint LoopIndex;
extern uint IPSecStatus;

typedef struct ChangeNotifyEvent {
    CTEEvent        cne_event;
    IPNotifyOutput  cne_info;
    LIST_ENTRY      *cne_queue;
    void            *cne_lock;
} ChangeNotifyEvent;

void ChangeNotifyAsync(CTEEvent *Event, PVOID Context);

void InvalidateRCEChain(RouteTableEntry * RTE);

extern IPAddr g_ValidAddr;

extern uint TotalFreeInterfaces;
extern uint MaxFreeInterfaces;
extern Interface *FrontFreeList;
extern Interface *RearFreeList;


RouteCacheEntry *RCEFreeList = NULL;

extern void DampCheck(void);

#if IPMCAST

#define MCAST_STARTED   1
extern uint g_dwMcastState;

extern BOOLEAN IPMForwardAfterRcv(NetTableEntry *PrimarySrcNTE,
                                  IPHeader UNALIGNED *Header, uint HeaderLength,
                                  PVOID Data, uint BufferLength,
                                  NDIS_HANDLE LContext1, uint LContext2,
                                  uchar DestType, LinkEntry *LinkCtxt);

extern BOOLEAN IPMForwardAfterRcvPkt(NetTableEntry *PrimarySrcNTE,
                                     IPHeader UNALIGNED *Header,
                                     uint HeaderLength,
                                     PVOID Data, uint BufferLength,
                                     NDIS_HANDLE LContext1, uint LContext2,
                                     uchar DestType, uint MacHeaderSize,
                                     PNDIS_BUFFER NdisBuffer,
                                     uint* pClientCnt, LinkEntry * LinkCtxt);
#endif

ulong DbgNumPktFwd = 0;

ulong UnConnected = 0;
RouteCacheEntry *UnConnectedRCE;
ulong Rcefailures = 0;

extern NetTableEntry **NewNetTableList;        // hash table for NTEs
extern uint NET_TABLE_SIZE;
extern NetTableEntry *DHCPNTE;    // Pointer to NTE being DHCP'd.

extern NetTableEntry *LoopNTE;    // Pointer to loopback NTE.
extern Interface LoopInterface;    // Pointer to loopback interface.

extern IP_STATUS SendICMPErr(IPAddr, IPHeader UNALIGNED *, uchar, uchar, ulong);
extern IP_STATUS SendICMPIPSecErr(IPAddr, IPHeader UNALIGNED *, uchar, uchar, ulong);
extern uchar ParseRcvdOptions(IPOptInfo *, OptIndex *);
extern void ULMTUNotify(IPAddr Dest, IPAddr Src, uchar Prot, void *Ptr,
                        uint NewMTU);
void EnableRouter();
void DisableRouter();

IPHeader *GetFWPacket(PNDIS_PACKET *ReturnedPacket);
void FreeFWPacket(PNDIS_PACKET Packet);
PNDIS_BUFFER GetFWBufferChain(uint DataLength, PNDIS_PACKET Packet,
                              PNDIS_BUFFER *TailPointer);
BOOLEAN InitForwardingPools();

PVOID
NTAPI
FwPacketAllocate (
    IN POOL_TYPE PoolType,
    IN SIZE_T NumberOfBytes,
    IN ULONG Tag
    );

VOID
NTAPI
FwPacketFree (
    IN PVOID Buffer
    );

extern Interface *IFList;
extern NDIS_HANDLE BufferPool;

extern CTEBlockStruc TcpipUnloadBlock;    // Structure for blocking at time of unload
extern BOOLEAN fRouteTimerStopping;
void IPDelNTE(NetTableEntry * NTE, CTELockHandle * RouteTableHandle);

CACHE_LINE_KSPIN_LOCK RouteTableLock;
LIST_ENTRY RtChangeNotifyQueue;
LIST_ENTRY RtChangeNotifyQueueEx;

extern HANDLE IpHeaderPool;

NDIS_HANDLE IpForwardPacketPool;
HANDLE IpForwardLargePool;
HANDLE IpForwardSmallPool;

// Buffer size calculation:  Based on the MDL pool's implementation:
// sizeof(POOL_HEADER) + N * ALIGN_UP(sizeof(MDL) + BufSize, PVOID) == PAGE_SIZE
// N is the number of buffers per page.
// Choose BufSize to minimize wasted space per page
//
#ifdef _WIN64
// Chosen to get 5 buffers per pool page with minimal space wasted.
#define BUFSIZE_LARGE_POOL 1576
// Chosen to get 9 buffers per pool page with no space wasted.
#define BUFSIZE_SMALL_POOL 856
#else
// Chosen to get 3 buffers per pool page with 8 bytes wasted.
#define BUFSIZE_LARGE_POOL 1320
// Chosen to get 8 buffers per pool page with no space wasted.
#define BUFSIZE_SMALL_POOL 476
#endif

#define PACKET_POOL_SIZE 16*1024


uchar ForwardBCast;              // Flag indicating if we should forward bcasts.
uchar ForwardPackets;            // Flag indicating whether we should forward.
uchar RouterConfigured;          // TRUE if we were initially configured as a
                                 // router.
int IPEnableRouterRefCount;      // Tracks enables/disables of
                                 // routing by various services
RouteSendQ *BCastRSQ;

uint DefGWConfigured;            // Number of default gateways configed.
uint DefGWActive;                // Number of def. gateways active.
uint DeadGWDetect;
uint PMTUDiscovery;

ProtInfo *RtPI = NULL;

IPMask IPMaskTable[] =
{
 CLASSA_MASK,
 CLASSA_MASK,
 CLASSA_MASK,
 CLASSA_MASK,
 CLASSA_MASK,
 CLASSA_MASK,
 CLASSA_MASK,
 CLASSA_MASK,
 CLASSB_MASK,
 CLASSB_MASK,
 CLASSB_MASK,
 CLASSB_MASK,
 CLASSC_MASK,
 CLASSC_MASK,
 CLASSD_MASK,
 CLASSE_MASK};

extern void TransmitFWPacket(PNDIS_PACKET, uint);

uint MTUTable[] =
{
    65535 - sizeof(IPHeader),
    32000 - sizeof(IPHeader),
    17914 - sizeof(IPHeader),
    8166 - sizeof(IPHeader),
    4352 - sizeof(IPHeader),
    2002 - sizeof(IPHeader),
    1492 - sizeof(IPHeader),
    1006 - sizeof(IPHeader),
    508 - sizeof(IPHeader),
    296 - sizeof(IPHeader),
    MIN_VALID_MTU - sizeof(IPHeader)
};

uint DisableIPSourceRouting = 1;

CTETimer IPRouteTimer;

// Pointer to callout routine for dial on demand.
IPMapRouteToInterfacePtr DODCallout;

// Packet filter control variables
IPPacketFilterPtr ForwardFilterPtr = NULL;
BOOLEAN ForwardFilterEnabled = FALSE;
uint ForwardFilterRefCount = 0;
CTEBlockStruc ForwardFilterBlock;

IPRtChangePtr pIPRtChangePtr;

RouteInterface DummyInterface;    // Dummy interface.

#if FFP_SUPPORT
ULONG FFPRegFastForwardingCacheSize;    // FFP Configuration Params
ULONG FFPRegControlFlags;   // from the System Registry

ULONG FFPFlushRequired;     // Whether an FFP Cache Flush is needed
#endif // if FFP_SUPPORT

ULONG RouteTimerTicks;      // To simulate 2 timers with different granularity

ULONG FlushIFTimerTicks;    // To simulate 2 timers with different granularity

#ifdef ALLOC_PRAGMA
//
// Make init code disposable.
//
int InitRouting(IPConfigInfo * ci);

#pragma alloc_text(INIT, InitRouting)

#endif // ALLOC_PRAGMA

// this macro is called whenever we delete the route: takes care of routes on links
#define CleanupP2MP_RTE(_RTE) {                                     \
  if ((_RTE)->rte_link){                                            \
    LinkEntry *Link;                                                \
    RouteTableEntry *PrvRte, *tmpRte;                               \
    Link = (_RTE)->rte_link;                                        \
    PrvRte = Link->link_rte;                                        \
    tmpRte = Link->link_rte;                                        \
    while (tmpRte){                                                 \
      if (tmpRte == (_RTE)) break;                                  \
      PrvRte = tmpRte;                                              \
      tmpRte = tmpRte->rte_nextlinkrte;                             \
    }                                                               \
    if (tmpRte) {                                                   \
      if (PrvRte == tmpRte) {                                       \
        Link->link_rte = (_RTE)->rte_nextlinkrte;                   \
      } else {                                                      \
        PrvRte->rte_nextlinkrte = (_RTE)->rte_nextlinkrte;          \
      }                                                             \
    } else {                                                        \
      ASSERT((FALSE));                                              \
    }                                                               \
  }                                                                 \
}


//** GetIfConstraint - Decide whether to constrain a lookup
//
// Arguments: Dest    - destination address
//            Src     - source address
//            OptInfo - options to use for a lookup
//            fIpsec  - IPsec reinjected packet
//
// Returns: IfIndex to constrain lookup to,
//          0 if unconstrained
//          INVALID_IF_INDEX if constrained by source address only
//
uint
GetIfConstraint(IPAddr Dest, IPAddr Src, IPOptInfo *OptInfo, BOOLEAN fIpsec)
{
    uint ConstrainIF=0;

    if (CLASSD_ADDR(Dest)) {
        ConstrainIF = (OptInfo)? OptInfo->ioi_mcastif : 0;
        if (!ConstrainIF && Src && !fIpsec) {
            ConstrainIF = INVALID_IF_INDEX;
        }
    } else {
        ConstrainIF = (OptInfo)? OptInfo->ioi_ucastif : 0;
    }

    return ConstrainIF;
}

//** DummyFilterPtr - Dummy filter-driver callout-routine
//
//  A dummy routine installed while a real callout is in the process of being
//  deregistered.
//
//  Entry:  no arguments used.
//
//  Returns: FORWARD.
//
FORWARD_ACTION
DummyFilterPtr(struct IPHeader UNALIGNED* PacketHeader,
               uchar* Packet, uint PacketLength,
               uint RecvInterfaceIndex, uint SendInterfaceIndex,
               IPAddr RecvLinkNextHop, IPAddr SendLinkNextHop)
{
    return FORWARD;
}

//** SetDummyFilterPtr - filter-driver callout installation routine
//
//  A type-safe routine to install the dummy packet-filter routine as the
//  packet-filter callout.
//
//  Entry:  FilterPtr   - the new packet-filter callout.
//
//  Returns: Nothing.
//
void
SetDummyFilterPtr(IPPacketFilterPtr FilterPtr)
{
    InterlockedExchangePointer((PVOID*)&ForwardFilterPtr, DummyFilterPtr);
}

//** DerefFilterPtr - dereference the filter-driver callout-routine
//
//  Drops the reference-count on the filter-driver callout and, if necessary,
//  signals anyone blocked on the callout.
//
//  Entry:   Nothing.
//
//  Returns: Nothing.
//
void
DerefFilterPtr(void)
{
    if (CTEInterlockedDecrementLong(&ForwardFilterRefCount) == 0)
        CTESignal(&ForwardFilterBlock, NDIS_STATUS_SUCCESS);
}

//** NotifyFilterOfDiscard - notify the filter before discarding a packet
//
//  Called when a packet is to be dropped before the filtering step is done.
//  This allows the dropped packet to be logged, if necessary.
//
//  Entry:  NTE             - receiving NTE
//          IPH             - header of dropped packet
//          Data            - payload of dropped packet
//          DataSize        - length of bytes at 'Data'.
//
//  Returns: TRUE if IP filter-driver returned 'FORWARD', FALSE otherwise.
//
BOOLEAN
NotifyFilterOfDiscard(NetTableEntry* NTE, IPHeader UNALIGNED* IPH, uchar* Data,
                      uint DataSize)
{
    FORWARD_ACTION Action;
    CTEInterlockedIncrementLong(&ForwardFilterRefCount);
    Action = (*ForwardFilterPtr)(IPH, Data, DataSize, NTE->nte_if->if_index,
                                 INVALID_IF_INDEX, IPADDR_LOCAL, NULL_IP_ADDR);
    DerefFilterPtr();
    return Action == FORWARD;
}

//** DuumyXmit - Dummy interface transmit handler.
//
//  A dummy routine that should never be called.
//
//  Entry:  Context         - NULL.
//          Packet          - Pointer to packet to be transmitted.
//          Dest            - Destination addres of packet.
//          RCE             - Pointer to RCE (should be NULL).
//
//  Returns: NDIS_STATUS_PENDING
//

NDIS_STATUS
__stdcall
DummyXmit(void *Context, PNDIS_PACKET *PacketArray, uint NumberOfPackets,
          IPAddr Dest, RouteCacheEntry * RCE, void *LinkCtxt)
{
    ASSERT(FALSE);
    return NDIS_STATUS_SUCCESS;
}

//* DummyXfer - Dummy interface transfer data routine.
//
//  A dummy routine that should never be called.
//
//  Entry:  Context         - NULL.
//          TDContext       - Original packet that was sent.
//          Dummy           - Unused
//          Offset          - Offset in frame from which to start copying.
//          BytesToCopy     - Number of bytes to copy.
//          DestPacket      - Packet describing buffer to copy into.
//          BytesCopied     - Place to return bytes copied.
//
//  Returns: NDIS_STATUS_SUCCESS
//
NDIS_STATUS
__stdcall
DummyXfer(void *Context, NDIS_HANDLE TDContext, uint Dummy, uint Offset, uint BytesToCopy,
          PNDIS_PACKET DestPacket, uint * BytesCopied)
{
    ASSERT(FALSE);

    return NDIS_STATUS_FAILURE;
}

//* DummyClose - Dummy close routine.
//
//      A dummy routine that should never be called.
//
//  Entry:  Context     - Unused.
//
//  Returns: Nothing.
//
void
__stdcall
DummyClose(void *Context)
{
    ASSERT(FALSE);
}

//* DummyInvalidate - .
//
//      A dummy routine that should never be called.
//
//  Entry:  Context     - Unused.
//          RCE         - Pointer to RCE to be invalidated.
//
//  Returns: Nothing.
//
void
__stdcall
DummyInvalidate(void *Context, RouteCacheEntry * RCE)
{
}

//* DummyQInfo - Dummy query information handler.
//
//  A dummy routine that should never be called.
//
//  Input:  IFContext   - Interface context (unused).
//          ID          - TDIObjectID for object.
//          Buffer      - Buffer to put data into.
//          Size        - Pointer to size of buffer. On return, filled with
//                        bytes copied.
//          Context     - Pointer to context block.
//
//  Returns: Status of attempt to query information.
//
int
__stdcall
DummyQInfo(void *IFContext, TDIObjectID * ID, PNDIS_BUFFER Buffer, uint * Size,
           void *Context)
{
    ASSERT(FALSE);

    return TDI_INVALID_REQUEST;
}

//* DummySetInfo - Dummy query information handler.
//
//  A dummy routine that should never be called.
//
//  Input:  IFContext   - Interface context (unused).
//          ID          - TDIObjectID for object.
//          Buffer      - Buffer to put data into.
//          Size        - Pointer to size of buffer. On return, filled with
//                        bytes copied.
//
//  Returns: Status of attempt to query information.
//
int
__stdcall
DummySetInfo(void *IFContext, TDIObjectID * ID, void *Buffer, uint Size)
{
    ASSERT(FALSE);

    return TDI_INVALID_REQUEST;
}

//* DummyAddAddr - Dummy add address routine.
//
//  Called at init time when we need to initialize ourselves.
//
uint
__stdcall
DummyAddAddr(void *Context, uint Type, IPAddr Address, IPMask Mask,
             void *Context2)
{
    ASSERT(FALSE);

    return TRUE;
}

//* DummyDelAddr - Dummy del address routine.
//
//  Called at init time when we need to initialize ourselves.
//
uint
__stdcall
DummyDelAddr(void *Context, uint Type, IPAddr Address, IPMask Mask)
{
    ASSERT(FALSE);

    return TRUE;
}

//* DummyGetEList - Dummy get entity list.
//
//  A dummy routine that should never be called.
//
//  Input:  Context     - Unused.
//          EntityList  - Pointer to entity list to be filled in.
//          Count       - Pointer to number of entries in the list.
//
//  Returns Status of attempt to get the info.
//
int
__stdcall
DummyGetEList(void *Context, TDIEntityID * EntityList, uint * Count)
{
    ASSERT(FALSE);

    return FALSE;
}

//* DummyDoNdisReq - Dummy send NDIS request
//
//  A dummy routine that should never be called.
//
//  Input:  Context     - Interface context (unused).
//          RT          - NDIS Request Type
//          OID         - NDIS Request OID
//          Info        - Information Buffer.
//          Length      - Pointer to size of buffer
//          Needed      - Pointer to required size
//          Blocking    - Call is Sync or Async
//
//  Returns Status of attempt to get the info.
//
uint
__stdcall
DummyDoNdisReq(void *Context, NDIS_REQUEST_TYPE RT,
               NDIS_OID OID, void *Info, uint Length,
               uint * Needed, BOOLEAN Blocking)
{
    ASSERT(FALSE);

    return FALSE;
}

#if FFP_SUPPORT

// Max number of FFP enabled NIC drivers in the system at any time
// Note that this serves to limit total cache memory for FFP support
//
#define    MAXFFPDRVS     8

//* IPGetFFPDriverList - Lists unique FFP enabled drivers in the system
//
//  Called by functions that dispatch requests to FFP enabled drivers
//
//  Input:  arrIF       - Array of IFs to reach all FFP enabled drivers
//
//  Returns: Number of FFP enabled drivers in the system
//
uint
IPGetFFPDriverList(Interface ** arrIF)
{
    ULONG numIF;
    Interface *IF;
    UINT i, j;

    CTELockHandle Handle;

    CTEGetLock(&RouteTableLock.Lock, &Handle);

    numIF = 0;

    // Take a lock to protect the list of all interfaces

    // Go over the interface list to pick FFP drivers
    for (IF = IFList; IF != NULL; IF = IF->if_next) {
        // Does this interface's driver support FFP ?
        if (IF->if_ffpversion == 0)
            continue;

        // FFP supported; was driver already picked ?
        for (i = 0; i < numIF; i++) {
            if (IF->if_ffpdriver == arrIF[i]->if_ffpdriver)
                break;
        }

        if (i == numIF) {
            ASSERT(numIF < MAXFFPDRVS);
            arrIF[numIF++] = IF;
        }
    }

    // Release lock to protect the list of all interfaces

    CTEFreeLock(&RouteTableLock.Lock, Handle);

    return numIF;
}

//* IPReclaimRequestMem - Post processing upon request completion
//
//  ARP calls back upon completion of async requests IP sends ARP
//
//  Input:    pRequestInfo    - Points to request IP sends ARP
//
//  Returns:    None
//
void
IPReclaimRequestMem(PVOID pRequestInfo)
{
    // Decrement ref count, and reclaim memory if it drops to zero
    if (InterlockedDecrement(&((ReqInfoBlock *) pRequestInfo)->RequestRefs) == 0) {
        // TCPTRACE(("IPReclaimRequestMem: Freeing mem at pReqInfo = %08X\n",
        //                pRequestInfo));
        CTEFreeMem(pRequestInfo);
    }
}

//* IPFlushFFPCaches - Flush all FFP Caches
//
//  Call ARP to flush FFP caches in layer 2
//
//  Input:    None
//
//  Returns    None
//
void
IPFlushFFPCaches(void)
{
    Interface *arrIF[MAXFFPDRVS];
    ULONG numIF;
    CTELockHandle lhandle;
    ReqInfoBlock *pRequestInfo;
    FFPFlushParams *pFlushInfo;
    UINT i, j;

    // Check if any requests need to be posted at all
    if (numIF = IPGetFFPDriverList(arrIF)) {
        // Allocate the request block - For General and Request Specific Parts
        pRequestInfo = CTEAllocMemNBoot(sizeof(ReqInfoBlock) + sizeof(FFPFlushParams), '7iCT');
        // TCPTRACE(("IPFlushFFPCaches: Allocated mem at pReqInfo = %08X\n",
        //                pRequestInfo));

        if (pRequestInfo == NULL) {
            return;
        }
        // Prepare the params for the request [ Part common to all requests ]
        pRequestInfo->RequestType = OID_FFP_FLUSH;
        pRequestInfo->ReqCompleteCallback = IPReclaimRequestMem;

        // Prepare the params for the request [ Part specific to this request ]
        pRequestInfo->RequestLength = sizeof(FFPFlushParams);

        pFlushInfo = (FFPFlushParams *) pRequestInfo->RequestInfo;

        pFlushInfo->NdisProtocolType = NDIS_PROTOCOL_ID_TCP_IP;

        // Assign Initial Ref Count to total num of requests
        pRequestInfo->RequestRefs = numIF;

        // CTEGetLock(&FFPIFsLock, &lhandle);

        for (i = 0; i < numIF; i++) {
            // Dispatch the request block to the ARP layer
            ASSERT(arrIF[i]->if_dondisreq != NULL);
            arrIF[i]->if_dondisreq(arrIF[i]->if_lcontext,
                                   NdisRequestSetInformation,
                                   OID_FFP_FLUSH,
                                   pRequestInfo->RequestInfo,
                                   pRequestInfo->RequestLength,
                                   NULL, FALSE);
        }

        // CTEFreeLock(&FFPIFsLock, lhandle);
    }
}

//* IPSetInFFPCaches - Set an entry in all FFP Caches
//
//  Call ARP to set -ve FFP entries in caches, (or)
//  Invalidate existing +ve or -ve FFP cache entries
//
//  Input:    PacketHeader - Header of the IP Packet
//            Packet - Rest of the IP Packet
//            PacketLength - Length of "Packet" param
//            CacheEntryType - DISCARD (-ve) or INVALID
//
//  Returns    None
//
void
IPSetInFFPCaches(struct IPHeader UNALIGNED * PacketHeader, uchar * Packet,
                 uint PacketLength, ulong CacheEntryType)
{
    Interface *arrIF[MAXFFPDRVS];
    ULONG numIF;
    CTELockHandle lhandle;
    ReqInfoBlock *pRequestInfo;
    FFPDataParams *pSetInInfo;
    UINT i, j;

    // Check if any requests need to be posted at all
    if (numIF = IPGetFFPDriverList(arrIF)) {
        if (PacketLength < sizeof(ULONG)) {
            return;
        }
        // Allocate the request block - For General and Request Specific Parts
        pRequestInfo = CTEAllocMemNBoot(sizeof(ReqInfoBlock) + sizeof(FFPDataParams), '8iCT');
        // TCPTRACE(("IPSetInFFPCaches: Allocated mem at pReqInfo = %08X\n",
        //                pRequestInfo));

        if (pRequestInfo == NULL) {
            return;
        }
        // Prepare the params for the request [ Part common to all requests ]
        pRequestInfo->RequestType = OID_FFP_DATA;
        pRequestInfo->ReqCompleteCallback = IPReclaimRequestMem;

        // Prepare the params for the request [ Part specific to this request ]
        pRequestInfo->RequestLength = sizeof(FFPDataParams);

        pSetInInfo = (FFPDataParams *) pRequestInfo->RequestInfo;

        pSetInInfo->NdisProtocolType = NDIS_PROTOCOL_ID_TCP_IP;

        pSetInInfo->CacheEntryType = CacheEntryType;

        pSetInInfo->HeaderSize = sizeof(IPHeader) + sizeof(ULONG);
        RtlCopyMemory(&pSetInInfo->Header, PacketHeader, sizeof(IPHeader));
        pSetInInfo->IpHeader.DwordAfterHeader = *(ULONG *) Packet;

        // Assign Initial Ref Count to total num of requests
        pRequestInfo->RequestRefs = numIF;

        // CTEGetLock(&FFPIFsLock, &lhandle);

        for (i = 0; i < numIF; i++) {
            // Dispatch the request block to the ARP layer
            ASSERT(arrIF[i]->if_dondisreq != NULL);
            arrIF[i]->if_dondisreq(arrIF[i]->if_lcontext,
                                   NdisRequestSetInformation,
                                   OID_FFP_DATA,
                                   pRequestInfo->RequestInfo,
                                   pRequestInfo->RequestLength,
                                   NULL, FALSE);
        }

        // CTEFreeLock(&FFPIFsLock, lhandle);
    }
}

//* IPStatsFromFFPCaches - Sum Stats from all FFP Caches
//
//  Call ARP to get FFP Stats in layer 2
//
//  Input:    Pointer to the buffer that is filled with statistics
//
//  Returns    None
//
void
IPStatsFromFFPCaches(FFPDriverStats * pCumulStats)
{
    Interface *arrIF[MAXFFPDRVS];
    ULONG numIF;
    CTELockHandle lhandle;
    UINT i, j;
    FFPDriverStats DriverStatsInfo =
    {
     NDIS_PROTOCOL_ID_TCP_IP,
     0, 0, 0, 0, 0, 0
    };

    RtlZeroMemory(pCumulStats, sizeof(FFPDriverStats));

    if (numIF = IPGetFFPDriverList(arrIF)) {
        // CTEGetLock(&FFPIFsLock, &lhandle);

        for (i = 0; i < numIF; i++) {
            // Dispatch the request block to the ARP layer
            ASSERT(arrIF[i]->if_dondisreq != NULL);
            if (arrIF[i]->if_dondisreq(arrIF[i]->if_lcontext,
                                       NdisRequestQueryInformation,
                                       OID_FFP_DRIVER_STATS,
                                       &DriverStatsInfo,
                                       sizeof(FFPDriverStats),
                                       NULL, TRUE) == NDIS_STATUS_SUCCESS) {
              // Consolidate results from all drivers
              pCumulStats->PacketsForwarded += DriverStatsInfo.PacketsForwarded;
              pCumulStats->OctetsForwarded += DriverStatsInfo.OctetsForwarded;

              pCumulStats->PacketsDiscarded += DriverStatsInfo.PacketsDiscarded;
              pCumulStats->OctetsDiscarded += DriverStatsInfo.OctetsDiscarded;

              pCumulStats->PacketsIndicated += DriverStatsInfo.PacketsIndicated;
              pCumulStats->OctetsIndicated += DriverStatsInfo.OctetsIndicated;
            }
        }

        // CTEFreeLock(&FFPIFsLock, lhandle);
    }
}

#endif // if FFP_SUPPORT

//* DerefIF - Dereference an interface.
//
//  Called when we need to dereference an interface. We decrement the
//  refcount, and if it goes to zero we signal whoever is blocked on
//  it.
//
//  Input: IF    - Interfaec to be dereferenced.
//
//  Returns: Nothing.
//
#pragma optimize("", off)
void
DerefIF(Interface * IF)
{
    uint Original;

    Original = DEREFERENCE_IF(IF);

    if (Original != 1) {
        return;
    } else {
        // We just decremented the last reference. Wake whoever is
        // blocked on it.
        ASSERT(IF->if_block != NULL);
        CTESignal(IF->if_block, NDIS_STATUS_SUCCESS);
    }
}

//* LockedDerefIF - Dereference an interface w/RouteTableLock held.
//
// Called when we need to dereference an interface. We decrement the
// refcount, and if it goes to zero we signal whoever is blocked on
// it. The difference here is that we assume the caller already holds
// the RouteTableLock.
//
// Input: IF                          - Interfaec to be dereferenced.
//
// Returns: Nothing.
//
void
LockedDerefIF(Interface * IF)
{
    uint Original;

    LOCKED_DEREFERENCE_IF(IF);

    if (IF->if_refcount != 0) {
        return;
    } else {
        // We just decremented the last reference. Wake whoever is
        // blocked on it.
        ASSERT(IF->if_block != NULL);
        CTESignal(IF->if_block, NDIS_STATUS_SUCCESS);
    }
}
#pragma optimize("", on)

//* DerefLink - Dereference the Link
//
//  Called when we need to dereference a link. We decrement the
//  refcount, and if it goes to zero we free the link
//
//  Input:  Link    - Link to be dereferenced.
//
//  Returns: Nothing.
//
void
DerefLink(LinkEntry * Link)
{
    uint Original;

    Original = CTEInterlockedExchangeAdd(&Link->link_refcount, -1);

    if (Original != 1) {
        return;
    } else {
        // We just decremented the last reference.
        //  Call CloseLink to Notify lower layer that link is going down

        ASSERT(Link->link_if);
        ASSERT(Link->link_if->if_closelink);

#if DBG
        // P2MP stuff still needs to be cooked
        {
            Interface *IF = Link->link_if;
            LinkEntry *tmpLink = IF->if_link;

            while (tmpLink) {
                if (tmpLink == Link) {
                    // freeing the Link without cleaning up??
                    DbgBreakPoint();
                }
                tmpLink = tmpLink->link_next;
            }
        }
#endif

        (*(Link->link_if->if_closelink)) (Link->link_if->if_lcontext, Link->link_arpctxt);
        // Free the link
        CTEFreeMem(Link);
    }
}

//** AddrOnIF - Check to see if a given address is local to an IF
//
//  Called when we want to see if a given address is a valid local address
//  for an interface. We walk down the chain of NTEs in the interface, and
//  see if we get a match. We assume the caller holds the RouteTableLock
//  at this point.
//
//  Input:  IF          - Interface to check.
//          Addr        - Address to check.
//
//  Returns: TRUE if Addr is an address for IF, FALSE otherwise.
//
uint
AddrOnIF(Interface * IF, IPAddr Addr)
{
    NetTableEntry *NTE;

    NTE = IF->if_nte;
    while (NTE != NULL) {
        if ((NTE->nte_flags & NTE_VALID) && IP_ADDR_EQUAL(NTE->nte_addr, Addr))
            return TRUE;
        else
            NTE = NTE->nte_ifnext;
    }

    return FALSE;
}

//** BestNTEForIF - Find the 'best match' NTE on a given interface.
//
//  This is a utility function that takes an  address and tries to find the
//  'best match' NTE on a given interface. This is really only useful when we
//      have multiple IP addresses on a single interface.
//
//  Input:  Address     - Source address of packet.
//          IF          - Pointer to IF to be searched.
//
//  Returns: The 'best match' NTE.
//
NetTableEntry *
BestNTEForIF(IPAddr Address, Interface * IF)
{
    NetTableEntry *CurrentNTE, *FoundNTE;
    uint i;

    if (IF->if_nte != NULL) {
        // Walk the list of NTEs, looking for a valid one.
        CurrentNTE = IF->if_nte;
        FoundNTE = NULL;
        do {
            if (CurrentNTE->nte_flags & NTE_VALID) {
                if (IP_ADDR_EQUAL(Address & CurrentNTE->nte_mask,
                                  CurrentNTE->nte_addr & CurrentNTE->nte_mask))
                    return CurrentNTE;
                else if (FoundNTE == NULL)
                    FoundNTE = CurrentNTE;

            }
            CurrentNTE = CurrentNTE->nte_ifnext;
        } while (CurrentNTE != NULL);

        // If we found a match, or we didn't and the destination is not
        // a broadcast, return the result. We have special case code to
        // handle broadcasts, since the interface doesn't really matter there.
        if (FoundNTE != NULL || (!IP_ADDR_EQUAL(Address, IP_LOCAL_BCST) &&
                                 !IP_ADDR_EQUAL(Address, IP_ZERO_BCST))) {
            return FoundNTE;
        }
    }
    // An 'anonymous' I/F, or the address we're reaching is a broadcast and the
    // first interface has no address. Find a valid (non-loopback, non-null ip,
    // non-uni) address.
    for (i = 0; i < NET_TABLE_SIZE; i++) {
        NetTableEntry *NetTableList = NewNetTableList[i];
        for (CurrentNTE = NetTableList; CurrentNTE != NULL;
             CurrentNTE = CurrentNTE->nte_next) {
            if (CurrentNTE != LoopNTE &&
                (CurrentNTE->nte_flags & NTE_VALID) &&
                !((CurrentNTE->nte_if->if_flags & IF_FLAGS_NOIPADDR) && IP_ADDR_EQUAL(CurrentNTE->nte_addr, NULL_IP_ADDR)) &&
                !(CurrentNTE->nte_if->if_flags & IF_FLAGS_UNI)) {
                return CurrentNTE;
            }
        }
    }
    return NULL;

}

//** IsBCastonNTE - Determine if the specified addr. is a bcast on a spec. NTE.
//
//  This routine is called when we need to know if an address is a broadcast
//  on a particular net. We check in the order we expect to be most common - a
//  subnet bcast, an all ones broadcast, and then an all subnets broadcast.  We
//  return the type of broadcast it is, or return DEST_LOCAL if it's not a
//  broadcast.
//
//  Entry:  Address     - Address in question.
//          NTE         - NetTableEntry to check Address against.
//
//  Returns: Type of broadcast.
//
uchar
IsBCastOnNTE(IPAddr Address, NetTableEntry * NTE)
{
    IPMask Mask;
    IPAddr BCastAddr;

    if (NTE->nte_flags & NTE_VALID) {

        BCastAddr = NTE->nte_if->if_bcast;
        Mask = NTE->nte_mask;

        if (Mask != 0xFFFFFFFF) {
            if (IP_ADDR_EQUAL(Address,
                              (NTE->nte_addr & Mask) | (BCastAddr & ~Mask)))
                return DEST_SN_BCAST;
        }
        // See if it's an all subnet's broadcast.
        if (!CLASSD_ADDR(Address)) {
            Mask = IPNetMask(Address);

            if (IP_ADDR_EQUAL(Address,
                              (NTE->nte_addr & Mask) | (BCastAddr & ~Mask)))
                return DEST_BCAST;
        } else {
            // This is a class D address. If we're allowed to receive
            // mcast datagrams, check our list.

            return DEST_MCAST;
        }

        // A global bcast is certainly a bcast on this net.
        if (IP_ADDR_EQUAL(Address, BCastAddr))
            return DEST_BCAST;

    } else if (NTE == DHCPNTE) {

        BCastAddr = NTE->nte_if->if_bcast;

        if ((IP_ADDR_EQUAL(Address, BCastAddr))) {
            return (DEST_BCAST);
        }
    }
    return DEST_LOCAL;
}

//** InvalidSourceAddress - Check to see if a source address is invalid.
//
//  This function takes an input address and checks to see if it is valid
//  if used as the source address of an incoming packet. An address is invalid
//  if it's 0, -1, a Class D or Class E address, is a net or subnet broadcast,
//  or has a 0 subnet or host part.
//
//  Input: Address      - Address to be check.
//
//  Returns: FALSE if the address is not invalid, TRUE if it is invalid.
//
uint
InvalidSourceAddress(IPAddr Address)
{
    NetTableEntry *NTE;            // Pointer to current NTE.
    IPMask Mask;                // Mask for address.
    uchar Result;                // Result of broadcast check.
    IPMask SNMask;
    IPAddr MaskedAddress;
    IPAddr LocalAddress;
    uint i;

    if (!CLASSD_ADDR(Address) &&
        !CLASSE_ADDR(Address) &&
        !IP_ADDR_EQUAL(Address, IP_ZERO_BCST) &&
        !IP_ADDR_EQUAL(Address, IP_LOCAL_BCST)
        ) {
        // It's not an obvious broadcast. See if it's an all subnets
        // broadcast, or has a zero host part.
        Mask = IPNetMask(Address);
        MaskedAddress = Address & Mask;

        if (!IP_ADDR_EQUAL(Address, MaskedAddress) &&
            !IP_ADDR_EQUAL(Address, (MaskedAddress | ~Mask))
            ) {
            // It's not an all subnet's broadcast, and it has a non-zero
            // host/subnet part. Walk our local IP addresses, and see if it's
            // a subnet broadcast.
            for (i = 0; i < NET_TABLE_SIZE; i++) {
                NetTableEntry *NetTableList = NewNetTableList[i];
                NTE = NetTableList;
                while (NTE) {

                    LocalAddress = NTE->nte_addr;

                    if ((NTE->nte_flags & NTE_VALID) &&
                        !IP_LOOPBACK(LocalAddress)) {

                        Mask = NTE->nte_mask;
                        MaskedAddress = LocalAddress & Mask;

                        if (!IP_ADDR_EQUAL(Mask, HOST_MASK)) {
                            if (IP_ADDR_EQUAL(Address, MaskedAddress) ||
                                IP_ADDR_EQUAL(Address,
                                              (MaskedAddress |
                                               (NTE->nte_if->if_bcast & ~Mask)))) {
                                return TRUE;
                            }
                        }
                    }
                    NTE = NTE->nte_next;
                }
            }

            return FALSE;
        }
    }
    return TRUE;
}

// 8 regions of 31 cache elements.
// Each region is indexed by the 3 most significant bits of the IP address.
// Each cache element within a region is indexed by a hash of the IP address.
// Each cache element is composed of 29 least significant bits of the IP
// address plus the three bit address type code.
// (31 is prime and works well with our hash.)
//
#define ATC_BITS                3
#define ATC_ELEMENTS_PER_REGION 31

#define ATC_REGIONS             (1 << ATC_BITS)
#define ATC_CODE_MASK           (ULONG32)(ATC_REGIONS - 1)
#define ATC_ADDR_MASK           (ULONG32)(~ATC_CODE_MASK)

// sanity check for 3 bits of address type code
C_ASSERT(ATC_REGIONS == 8);
C_ASSERT(ATC_CODE_MASK == 0x00000007);
C_ASSERT(ATC_ADDR_MASK == 0xFFFFFFF8);

// Each cache element is 32 bits to support atomic reading and writing.
//
ULONG32 AddrTypeCache [ATC_REGIONS * ATC_ELEMENTS_PER_REGION];

#if DBG
ULONG DbgAddrTypeCacheHits;
ULONG DbgAddrTypeCacheMisses;
ULONG DbgAddrTypeCacheCollisions;
ULONG DbgAddrTypeCacheFlushes;
ULONG DbgAddrTypeCacheNoUpdates;
ULONG DbgAddrTypeCacheLastNoUpdateDestType;
#endif

// The following type codes must fit within ATC_BITS of information.
//
typedef enum _ADDRESS_TYPE_CODE {
    ATC_LOCAL = 0,
    ATC_BCAST,
    ATC_MCAST,
    ATC_REMOTE,
    ATC_REMOTE_BCAST,
    ATC_REMOTE_MCAST,
    ATC_SUBNET_BCAST,
    ATC_NUM_CODES
} ADDRESS_TYPE_CODE;

// The following array is indexed by ADDRESS_TYPE_CODE values.
//
const char MapAddrTypeCodeToDestType [] = {
    DEST_LOCAL,
    DEST_BCAST,
    DEST_MCAST,
    DEST_REMOTE,
    DEST_REM_BCAST,
    DEST_REM_MCAST,
    DEST_SN_BCAST,
};

//** ComputeAddrTypeCacheIndex - Given an IP address, compute the index
//      of its corresponding entry in the address type cache.
//
//  Input:  Address - IP Address to compute the index of.
//
//  Returns: Valid index into the address type cache.
//
__forceinline
ULONG
ComputeAddrTypeCacheIndex(IPAddr Address)
{
    ULONG Region;
    ULONG Offset;
    ULONG Index;

    // Locate the region of the cache where this Address would reside.
    //
    Region = Address >> (32 - ATC_BITS);
    ASSERT(Region < ATC_REGIONS);

    // Locate the offset into the region where this address would reside.
    // This is done by hashing the address.
    //
    Offset = (1103515245 * Address + 12345) % ATC_ELEMENTS_PER_REGION;

    // Compute the cache index and return it.
    //
    Index = (Region * ATC_ELEMENTS_PER_REGION) + Offset;

    ASSERT(Index < (sizeof(AddrTypeCache) / sizeof(AddrTypeCache[0])));

    return Index;
}

//** AddrTypeCacheFlush - Flush the cache entry associated with an address.
//
//  Input: Address - Address to remove from the cache.
//
//  Returns: nothing.
//
void
AddrTypeCacheFlush(IPAddr Address)
{
    ULONG CacheIndex;

    CacheIndex = ComputeAddrTypeCacheIndex(Address);

    AddrTypeCache [CacheIndex] = 0;

#if DBG
    DbgAddrTypeCacheFlushes++;
#endif
}

//** AddrTypeCacheLookup - Lookup an address from the address type cache.
//
//  Input:  Address     - Address to be lookup.
//  Output: CacheIndex  - Pointer to cache index corresponding to the Address.
//          DestType    - Pointer to destination type to be filled in if
//                        the address is found in the cache.
//
//  Returns: TRUE if the address was found in the cache.
//
//  N.B. The output parameter DestType is only initialized if TRUE is returned.
//
__forceinline
BOOLEAN
AddrTypeCacheLookup(IPAddr Address, ULONG *CacheIndex, uchar *DestType)
{
    ULONG32 CacheValue;

    // Read the value of the cache corresponding to this address.
    //
    *CacheIndex = ComputeAddrTypeCacheIndex(Address);
    CacheValue = AddrTypeCache [*CacheIndex];

    // If the cached value is non-zero and matches the relevent portion of
    // the address, then get the type code and translate it to the proper
    // destination type.
    //
    if ((CacheValue != 0) &&
        (((Address << ATC_BITS) ^ CacheValue) & ATC_ADDR_MASK) == 0) {

        ADDRESS_TYPE_CODE TypeCode = CacheValue & ATC_CODE_MASK;

        ASSERT(TypeCode < ATC_NUM_CODES);
        *DestType = MapAddrTypeCodeToDestType[TypeCode];

#if DBG
        DbgAddrTypeCacheHits++;
#endif
        return TRUE;
    }

#if DBG
        DbgAddrTypeCacheMisses++;
#endif

    return FALSE;
}

//** AddrTypeCacheUpdate - Add or update the destination type for an Address.
//      in the cache.
//
//  Input:  Address     - Address to be add or update.
//          CacheIndex  - Cache index corresponding to the Address.
//          DestType    - Destination type to cache for the Address.
//
//  Returns: nothing.
//
__forceinline
void
AddrTypeCacheUpdate(IPAddr Address, ULONG CacheIndex, uchar DestType)
{
    ADDRESS_TYPE_CODE TypeCode;
    BOOLEAN Update = TRUE;

    ASSERT(CacheIndex < (sizeof(AddrTypeCache) / sizeof(AddrTypeCache[0])));

    switch (DestType) {
    case DEST_LOCAL:
        TypeCode = ATC_LOCAL;
        break;
    case DEST_BCAST:
        TypeCode = ATC_BCAST;
        break;
    case DEST_MCAST:
        TypeCode = ATC_MCAST;
        break;
    case DEST_REMOTE:
        TypeCode = ATC_REMOTE;
        break;
    case DEST_REM_BCAST:
        TypeCode = ATC_REMOTE_BCAST;
        break;
    case DEST_REM_MCAST:
        TypeCode = ATC_REMOTE_MCAST;
        break;
    case DEST_SN_BCAST:
        TypeCode = ATC_SUBNET_BCAST;
        break;
    default:
        Update = FALSE;
#if DBG
        DbgAddrTypeCacheNoUpdates++;
        DbgAddrTypeCacheLastNoUpdateDestType = DestType;
#endif
    }

    if (Update) {
#if DBG
        ULONG32 CacheValue = AddrTypeCache [CacheIndex];

        if (CacheValue != 0) {
            DbgAddrTypeCacheCollisions++;
        }
#endif

        AddrTypeCache [CacheIndex] = (Address << ATC_BITS) | TypeCode;
    }
}

//** GetAddrType - Return the destination type of a specified address.
//
//  Input: Address - Address to get the destination type of.
//
//  Returns: Destination type.
//
uchar
GetAddrType(IPAddr Address)
{
    ULONG CacheIndex;
    NetTableEntry *NTE;             // Pointer to current NTE.
    IPMask Mask;                    // Mask for address.
    IPMask SNMask;
    uint i;
    uchar Result;                   // Result of broadcast check.

    // Check the cache and return if we got a hit.
    //
    if (AddrTypeCacheLookup(Address, &CacheIndex, &Result)) {
        return Result;
    }

    // We don't cache, nor do we need to cache, these types of invalid
    // addresses.
    //
    if (CLASSE_ADDR(Address)) {
        return DEST_INVALID;
    }

    // See if it's one of our local addresses, or a broadcast
    // on a local address.
    // optimize it for the DEST_LOCAL case
    //
    for (NTE = NewNetTableList[NET_TABLE_HASH(Address)];
         NTE; NTE = NTE->nte_next) {

        if (IP_ADDR_EQUAL(NTE->nte_addr, Address) &&
            (NTE->nte_flags & NTE_VALID) &&
            !((IP_ADDR_EQUAL(Address, NULL_IP_ADDR) && (NTE->nte_if->if_flags & IF_FLAGS_NOIPADDR)))) {
            Result = DEST_LOCAL;
            goto gat_exit;
        }
    }

    // go thru the whole table for other cases
    //
    for (i = 0; i < NET_TABLE_SIZE; i++) {
        for (NTE = NewNetTableList[i]; NTE; NTE = NTE->nte_next) {

            if (!(NTE->nte_flags & NTE_VALID)) {
                continue;
            }

            if ((Result = IsBCastOnNTE(Address, NTE)) != DEST_LOCAL) {
                goto gat_exit;
            }

            // See if the destination has a valid host part.
            SNMask = NTE->nte_mask;
            if (IP_ADDR_EQUAL(Address & SNMask, NTE->nte_addr & SNMask)) {
                // On this subnet. See if the host part is invalid.

                if (IP_ADDR_EQUAL(Address & SNMask, Address)) {
                    Result = DEST_INVALID;    // Invalid 0 host part.
                    goto gat_exit;
                }
            }
        }
    }

    // It's not a local address, see if it's loopback.
    if (IP_LOOPBACK(Address)) {
        Result = DEST_LOCAL;
        goto gat_exit;
    }

    // If we're doing IGMP, see if it's a Class D address. If it is,
    // return that.
    if (CLASSD_ADDR(Address)) {
        if (IGMPLevel != 0) {
            Result = DEST_REM_MCAST;
            goto gat_exit;
        } else {
            Result = DEST_INVALID;
            goto gat_exit;
        }
    }
    Mask = IPNetMask(Address);

    // Now check remote broadcast. When we get here we know that the
    // address is not a global broadcast, a subnet broadcast for a subnet
    // of which we're a member, or an all-subnets broadcast for a net of
    // which we're a member. Since we're avoiding making assumptions about
    // all subnet of a net having the same mask, we can't really check for
    // a remote subnet broadcast. We'll use the net mask and see if it's
    // a remote all-subnet's broadcast.
    if (IP_ADDR_EQUAL(Address, (Address & Mask) | (IP_LOCAL_BCST & ~Mask))) {
        Result = DEST_REM_BCAST;
        goto gat_exit;
    }

    // Check for invalid 0 parts. All we can do from here is see if he's
    // sending to a remote net with all zero subnet and host parts. We
    // can't check to see if he's sending to a remote subnet with an all
    // zero host part.
    if (IP_ADDR_EQUAL(Address, NULL_IP_ADDR)) {
        Result = DEST_INVALID;
        goto gat_exit;
    }

#if DBG
    if (IP_ADDR_EQUAL(Address, Address & Mask)) {
        //This is a remote address with null host part per classfull address
        //But may be a supernetted address, where the prefix len is less than the
        //class mask prefix len for the metid.
        //We should let this address go out.
        KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL," GAT: zero host part %x?\n", Address));
    }
#endif
    // Must be remote.
    Result = DEST_REMOTE;

gat_exit:

    AddrTypeCacheUpdate(Address, CacheIndex, Result);

    return Result;
}

//** GetLocalNTE - Get the local NTE for an incoming packet.
//
//  Called during receive processing to find a matching NTE for a packet.
//  First we check against the NTE we received it on, then against any NTE.
//
//  Input:  Address     - The dest. address of the packet.
//          NTE         - Pointer to NTE packet was received on - filled in on
//                        exit w/correct NTE.
//
//  Returns: DEST_LOCAL if the packet is destined for this host,
//           DEST_REMOTE if it needs to be routed,
//           DEST_SN_BCAST or DEST_BCAST if it's some sort of a broadcast.
//
uchar
GetLocalNTE(IPAddr Address, NetTableEntry ** NTE)
{
    NetTableEntry *LocalNTE = *NTE;
    IPMask Mask;
    uchar Result;
    uint i;
    Interface *LocalIF;
    NetTableEntry *OriginalNTE;

    // Quick check to see if it is for the NTE it came in on (the common case).
    if (IP_ADDR_EQUAL(Address, LocalNTE->nte_addr) &&
        (LocalNTE->nte_flags & NTE_VALID))
        return DEST_LOCAL;        // For us, just return.

    // Now check to see if it's a broadcast of some sort on the interface it
    // came in on.
    if ((Result = IsBCastOnNTE(Address, LocalNTE)) != DEST_LOCAL)
        return Result;
    //Is this a mcast on a loop interface
    if ((LocalNTE == LoopNTE) && CLASSD_ADDR(Address)) {
        return DEST_MCAST;
    }
    // The common cases failed us. Loop through the NetTable and see if
    // it is either a valid local address or is a broadcast on one of the NTEs
    // on the incoming interface. We won't check the NTE we've already looked
    // at. We look at all NTEs, including the loopback NTE, because a loopback
    // frame could come through here. Also, frames from ourselves to ourselves
    // will come in on the loopback NTE.

    i = 0;
    LocalIF = LocalNTE->nte_if;
    OriginalNTE = LocalNTE;
    // optimize it for the DEST_LOCAL case
    LocalNTE = NewNetTableList[NET_TABLE_HASH(Address)];
    while (LocalNTE) {
        if (LocalNTE != OriginalNTE) {
            if (IP_ADDR_EQUAL(Address, LocalNTE->nte_addr) &&
                (LocalNTE->nte_flags & NTE_VALID) &&
                !((IP_ADDR_EQUAL(Address, NULL_IP_ADDR) && (LocalNTE->nte_if->if_flags & IF_FLAGS_NOIPADDR)))) {
                *NTE = LocalNTE;
                return DEST_LOCAL;    // For us, just return.

            }
        }
        LocalNTE = LocalNTE->nte_next;

    }

    // go thru the whole table for other cases

    for (i = 0; i < NET_TABLE_SIZE; i++) {
        NetTableEntry *NetTableList = NewNetTableList[i];
        LocalNTE = NetTableList;
        while (LocalNTE) {
            if (LocalNTE != OriginalNTE) {

                // If this NTE is on the same interface as the NTE it arrived on,
                // see if it's a broadcast.
                if (LocalIF == LocalNTE->nte_if)
                    if ((Result = IsBCastOnNTE(Address, LocalNTE)) != DEST_LOCAL) {
                        *NTE = LocalNTE;
                        return Result;
                    }
            }
            LocalNTE = LocalNTE->nte_next;

        }
    }

    // It's not a local address, see if it's loopback.
    if (IP_LOOPBACK(Address)) {
        *NTE = LoopNTE;
        return DEST_LOCAL;
    }
    // If it's a class D address and we're receiveing multicasts, handle it
    // here.
    if (CLASSD_ADDR(Address)) {
        if (IGMPLevel != 0)
            return DEST_REM_MCAST;
        else
            return DEST_INVALID;
    }
    // It's not local. Check to see if maybe it's a net broadcast for a net
    // of which we're not a member. If so, return remote bcast. We can't check
    // for subnet broadcast of subnets for which we're not a member, since we're
    // not making assumptions about all subnets of a single net having the
    // same mask. If we're here it's not a subnet broadcast for a net of which
    // we're a member, so we don't know a subnet mask for it. We'll just use
    // the net mask.
    Mask = IPNetMask(Address);
    if (((*NTE)->nte_flags & NTE_VALID) &&
        (IP_ADDR_EQUAL(Address, (Address & Mask) |
                       ((*NTE)->nte_if->if_bcast & ~Mask))))
        return DEST_REM_BCAST;

    // If it's to the 0 address, or a Class E address, or has an all-zero
    // subnet and net part, it's invalid.

    if (IP_ADDR_EQUAL(Address, IP_ZERO_BCST) ||
        IP_ADDR_EQUAL(Address, (Address & Mask)) ||
        CLASSE_ADDR(Address))
        return DEST_INVALID;

    // If we're DHCPing the interface on which this came in we'll accept this.
    // If it came in as a broadcast a check in IPRcv() will reject it. If it's
    // a unicast to us we'll pass it up.
    if ((*NTE)->nte_flags & NTE_DHCP) {
        ASSERT(!((*NTE)->nte_flags & NTE_VALID));
        return DEST_LOCAL;
    }
    return DEST_REMOTE;
}

//** IsRouteICMP - This function is used by Router Discovery to determine
//  how we learned about the route. We are not allowed to update or timeout
//  routes that were not learned about via icmp. If the route is new then
//  we treat it as icmp and add a new entry.
//  Input:  Dest                    - Destination to search for.
//          Mask                    - Mask for destination.
//          FirstHop                - FirstHop to Dest.
//          OutIF                   - Pointer to outgoing interface structure.
//
//  Returns: TRUE if learned via ICMP, FALSE otherwise.
//
uint
IsRouteICMP(IPAddr Dest, IPMask Mask, IPAddr FirstHop, Interface * OutIF)
{
    RouteTableEntry *RTE;
    RouteTableEntry *TempRTE;

    RTE = FindSpecificRTE(Dest, Mask, FirstHop, OutIF, &TempRTE, FALSE);

    if (RTE == NULL)
        return (TRUE);

    if (RTE->rte_proto == IRE_PROTO_ICMP) {
        return (TRUE);
    } else {
        return (FALSE);
    }
}

void
UpdateDeadGWState( )
{
    uint Active = 0;
    uint Configured = 0;
    RouteTableEntry* RTE;
    RTE = GetDefaultGWs(&RTE);
    while (RTE) {
        ++Configured;
        if (RTE->rte_flags & RTE_VALID)
            ++Active;
        RTE = RTE->rte_next;
    }
    DefGWActive = Active;
    DefGWConfigured = Configured;
}

//* ValidateDefaultGWs - Mark all default gateways as valid.
//
//  Called to one or all of our default gateways as up. The caller specifies
//  the IP address of the one to mark as up, or NULL_IP_ADDR if they're all
//  supposed to be marked up. We return a count of how many we marked as
//  valid.
//
//  Input: IP address of G/W to mark as up.
//
//  Returns: Count of gateways marked as up.
//
uint
ValidateDefaultGWs(IPAddr Addr)
{
    RouteTableEntry *RTE;
    uint Count = 0;
    uint Now = CTESystemUpTime() / 1000L;

    RTE = GetDefaultGWs(&RTE);

    while (RTE != NULL) {
        if (RTE->rte_mask == DEFAULT_MASK && !(RTE->rte_flags & RTE_VALID) &&
            (IP_ADDR_EQUAL(Addr, NULL_IP_ADDR) ||
             IP_ADDR_EQUAL(Addr, RTE->rte_addr))) {
            RTE->rte_flags |= RTE_VALID;
            RTE->rte_valid = Now;

            Count++;
        }

        RTE->rte_todg = RTE->rte_fromdg = NULL;

        // To ensure that RCEs get switched to a lower-metric gateway
        // if one exists, invalidate all RCEs on this RTE.
        InvalidateRCEChain(RTE);

        RTE = RTE->rte_next;
    }

    DefGWActive += Count;
    UpdateDeadGWState();
    return Count;
}

//* InvalidateRCE - Invalidate an RCE.
//
//  Called to invalidate the RCE
//
//
//  Input:  RCE
//
//  Returns: usecnt on the RCE.
//
uint
InvalidateRCE(RouteCacheEntry * CurrentRCE)
{
    CTELockHandle RCEHandle;    // Lock handle for RCE being updated.
    Interface *OutIF;
    RouteTableEntry *RTE;
    RouteCacheEntry *PrevRCE;
    uint RCE_usecnt = 0;

    if (CurrentRCE != NULL) {

        CTEGetLock(&CurrentRCE->rce_lock, &RCEHandle);

        RCE_usecnt = CurrentRCE->rce_usecnt;

        if ((CurrentRCE->rce_flags & RCE_VALID) && !(CurrentRCE->rce_flags & RCE_LINK_DELETED)) {
            ASSERT(CurrentRCE->rce_rte != NULL);

            OutIF = CurrentRCE->rce_rte->rte_if;

            RTE = CurrentRCE->rce_rte;

            CurrentRCE->rce_rte->rte_rces -= CurrentRCE->rce_cnt;

            CurrentRCE->rce_flags &= ~RCE_VALID;
            CurrentRCE->rce_rte = (RouteTableEntry *) OutIF;

            if ((CurrentRCE->rce_flags & RCE_CONNECTED) &&
                (RCE_usecnt == 0)) {

                // KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"InvalidateRCE %x\n", CurrentRCE));

                (*(OutIF->if_invalidate)) (OutIF->if_lcontext, CurrentRCE);
                if (CurrentRCE->rce_flags & RCE_REFERENCED) {
                    LockedDerefIF(OutIF);
                    CurrentRCE->rce_flags &= ~RCE_REFERENCED;
                }
            }
            PrevRCE = STRUCT_OF(RouteCacheEntry, &RTE->rte_rcelist, rce_next);

            // Walk down the list until we find him.

            while (PrevRCE != NULL) {
                if (PrevRCE->rce_next == CurrentRCE)
                    break;
                PrevRCE = PrevRCE->rce_next;
            }

            //ASSERT(PrevRCE != NULL);
            if (PrevRCE != NULL) {
                PrevRCE->rce_next = CurrentRCE->rce_next;
            }
        }
        CTEFreeLock(&CurrentRCE->rce_lock, RCEHandle);

    }
    return RCE_usecnt;

}

//* InvalidateRCEChain - Invalidate the RCEs on an RCE.
//
//  Called to invalidate the RCE chain on an RTE. We assume the caller holds
//  the route table lock.
//
//  Input:  RTE                     - RTE on which to invalidate RCEs.
//
//  Returns: Nothing.
//
void
InvalidateRCEChain(RouteTableEntry * RTE)
{
    CTELockHandle RCEHandle;    // Lock handle for RCE being updated.
    RouteCacheEntry *TempRCE, *CurrentRCE;
    Interface *OutIF;

    OutIF = RTE->rte_if;

    // If there is an RCE chain on this RCE, invalidate the RCEs on it. We still
    // hold the RouteTableLock, so RCE closes can't happen.

    CurrentRCE = RTE->rte_rcelist;
    RTE->rte_rcelist = NULL;

    // Walk down the list, nuking each RCE.
    while (CurrentRCE != NULL) {

        CTEGetLock(&CurrentRCE->rce_lock, &RCEHandle);

        if ((CurrentRCE->rce_flags & RCE_VALID) && !(CurrentRCE->rce_flags & RCE_LINK_DELETED)) {
            ASSERT(CurrentRCE->rce_rte == RTE);

            RTE->rte_rces -= CurrentRCE->rce_cnt;

            CurrentRCE->rce_flags &= ~RCE_VALID;
            CurrentRCE->rce_rte = (RouteTableEntry *) OutIF;
            if ((CurrentRCE->rce_flags & RCE_CONNECTED) &&
                CurrentRCE->rce_usecnt == 0) {

                (*(OutIF->if_invalidate)) (OutIF->if_lcontext, CurrentRCE);
                if (CurrentRCE->rce_flags & RCE_REFERENCED) {
                    LockedDerefIF(OutIF);
                    CurrentRCE->rce_flags &= ~RCE_REFERENCED;
                }
            }
        } else
            ASSERT(FALSE);

        TempRCE = CurrentRCE->rce_next;
        CTEFreeLock(&CurrentRCE->rce_lock, RCEHandle);
        CurrentRCE = TempRCE;
    }

}

//* InvalidateRCELinks - Invalidate the RCEs on RTE when the link goes away
//
//  Called to invalidate the RCE chain on an RTE. We assume the caller holds
//  the route table lock.
//
//  Input:  RTE                     - RTE on which to invalidate RCEs.
//
//  Returns: Nothing.
//
void
InvalidateRCELinks(RouteTableEntry * RTE)
{
    CTELockHandle RCEHandle;    // Lock handle for RCE being updated.
    RouteCacheEntry *TempRCE, *CurrentRCE;
    Interface *OutIF;

    InvalidateRCEChain(RTE);

    OutIF = RTE->rte_if;

    ASSERT(OutIF->if_flags & IF_FLAGS_P2MP);
    ASSERT(RTE->rte_link);

    // If there is an RCE chain on this RCE, invalidate the RCEs on it. We still
    // hold the RouteTableLock, so RCE closes can't happen.

    CurrentRCE = RTE->rte_rcelist;
    RTE->rte_rcelist = NULL;

    // Walk down the list, nuking each RCE.
    while (CurrentRCE != NULL) {

        CTEGetLock(&CurrentRCE->rce_lock, &RCEHandle);

        // mark the RCE as link deleted so that this rce is not selected in iptransmit
        CurrentRCE->rce_flags |= RCE_LINK_DELETED;

        TempRCE = CurrentRCE->rce_next;
        CTEFreeLock(&CurrentRCE->rce_lock, RCEHandle);
        CurrentRCE = TempRCE;
    }

}

//* GetNextHopForRTE - determines the next-hop address for a route.
//
//  Called when we need an actual next-hop for a route, typically so
//  we can pass it to an external client. For local routes that have
//  an rte_addr field set to IPADDR_LOCAL, this means figuring out
//  the source NTE for the route and using its IP address.
//
//  Entry:  RTE     - the entry whose next-hop is required
//
//  Returns: IPAddr containing the next-hop
//
IPAddr
GetNextHopForRTE(RouteTableEntry* RTE)
{
    if (IP_ADDR_EQUAL(RTE->rte_addr, IPADDR_LOCAL)) {
        Interface       *IF = RTE->rte_if;
        NetTableEntry   *SrcNTE = BestNTEForIF(RTE->rte_dest, IF);
        if (IF->if_nte != NULL && SrcNTE != NULL)
            return SrcNTE->nte_addr;
        else
            return RTE->rte_dest;
    }
    return RTE->rte_addr;
}

//** FindValidIFForRTE - Find a valid inteface for an RTE.
//
//  Called when we're going to send a packet out a route that currently marked
//  as disconnected. If we have a valid callout routine we'll call it to find
//  the outgoing interface index, and set up the RTE to point at that interface.
//  This routine is called with the RouteTableLock held.
//
//  Input:  RTE         - A pointer to the RTE for the route being used.
//          Destination - Destination IP address we're trying to reach.
//          Source      - Source IP address we're sending from.
//          Protocol    - Protocol type of packet that caused send.
//          Buffer      - Pointer to first part of packet that caused send.
//          Length      - Length of buffer.
//          HdrSrc      - Src Address in header
//
//      Returns: A pointer to the RTE, or NULL if that RTE couldn't be connected.
//
RouteTableEntry *
FindValidIFForRTE(RouteTableEntry * RTE, IPAddr Destination, IPAddr Source,
                  uchar Protocol, uchar * Buffer, uint Length, IPAddr HdrSrc)
{
    uint NewIFIndex;
    Interface *NewIF;
    NetTableEntry *NewNTE;

    if (DODCallout != NULL) {
        // There is a callout. See if it can help us.

        NewIFIndex = (*DODCallout) (RTE->rte_context, Destination, Source,
                                    Protocol, Buffer, Length, HdrSrc);


        if (NewIFIndex != INVALID_IF_INDEX) {
            // We got what should be a valid index. Walk our interface table list
            // and see if we can find a matching interface structure.
            for (NewIF = IFList; NewIF != NULL; NewIF = NewIF->if_next) {
                if (NewIF->if_index == NewIFIndex) {
                    // Found one.
                    break;
                }
            }
            if ((NewIF != NULL) && (NewIF->if_ntecount)) {
                // We found a matching structure. Set the RTE interface to point
                // to this, and mark as connected.
                if (RTE->rte_addr != IPADDR_LOCAL) {
                    // See if the first hop of the route is a local address on this
                    // new interface. If it is, mark it as local.
                    for (NewNTE = NewIF->if_nte; NewNTE != NULL;
                         NewNTE = NewNTE->nte_ifnext) {

                        // Don't look at him if he's not valid.
                        if (!(NewNTE->nte_flags & NTE_VALID)) {
                            continue;
                        }
                        // See if the first hop in the RTE is equal to this IP
                        // address.
                        if (IP_ADDR_EQUAL(NewNTE->nte_addr, RTE->rte_addr)) {
                            // It is, so mark as local and quit looking.
                            RTE->rte_addr = IPADDR_LOCAL;
                            RTE->rte_type = IRE_TYPE_DIRECT;
                            break;
                        }
                    }
                }
                // Set the RTE to the new interface, and mark him as valid.
                RTE->rte_if = NewIF;
                RTE->rte_flags |= RTE_IF_VALID;
                SortRoutesInDestByRTE(RTE);
                RTE->rte_mtu = NewIF->if_mtu - sizeof(IPHeader);
                return RTE;
            } else {
                // ASSERT(FALSE);
                return NULL;
            }
        }
    }
    // Either the callout is NULL, or the callout couldn't map a inteface index.
    return NULL;
}

//* GetRouteContext - Routine to get the route context for a specific route.
//
//  Called when we need to get the route context for a path, usually when we're
//  adding a route derived from an existing route. We return the route context
//  for the existing route, or NULL if we can't find one.
//
//  Input:  Destination                     - Destination address of path.
//          Source                          - Source address of path.
//
//  Returns: A ROUTE_CONTEXT, or 0.
//
ROUTE_CONTEXT
GetRouteContext(IPAddr Destination, IPAddr Source)
{
    CTELockHandle Handle;
    RouteTableEntry *RTE;
    ROUTE_CONTEXT Context;

    CTEGetLock(&RouteTableLock.Lock, &Handle);
    RTE = LookupRTE(Destination, Source, HOST_ROUTE_PRI, FALSE);
    if (RTE != NULL) {
        Context = RTE->rte_context;
    } else
        Context = 0;

    CTEFreeLock(&RouteTableLock.Lock, Handle);

    return (Context);
}

//** LookupNextHop - Look up the next hop
//
//  Called when we need to find the next hop on our way to a destination. We
//  call LookupRTE to find it, and return the appropriate information.
//
//  In a PnP build, the interface is referenced here.
//
//  Entry:  Destination     - IP address we're trying to reach.
//          Src             - Source address of datagram being routed.
//          NextHop         - Pointer to IP address of next hop (returned).
//          MTU             - Pointer to where to return max MTU used on the
//                            route.
//
//  Returns: Pointer to outgoing interface if we found one, NULL otherwise.
//
Interface *
LookupNextHop(IPAddr Destination, IPAddr Src, IPAddr * NextHop, uint * MTU)
{
    CTELockHandle TableLock;    // Lock handle for routing table.
    RouteTableEntry *Route;        // Pointer to route table entry for route.
    Interface *IF;

    CTEGetLock(&RouteTableLock.Lock, &TableLock);
    Route = LookupRTE(Destination, Src, HOST_ROUTE_PRI, FALSE);

    if (Route != (RouteTableEntry *) NULL) {
        IF = Route->rte_if;

        // If this is a direct route, send straight to the destination.
        *NextHop = IP_ADDR_EQUAL(Route->rte_addr, IPADDR_LOCAL) ? Destination :
            Route->rte_addr;

        // if the route is on a P2MP interface get the mtu from the link associated with the route
        if (Route->rte_link)
            *MTU = Route->rte_link->link_mtu;
        else
            *MTU = Route->rte_mtu;

        LOCKED_REFERENCE_IF(IF);
        CTEFreeLock(&RouteTableLock.Lock, TableLock);
        return IF;
    } else {                    // Couldn't find a route.
        CTEFreeLock(&RouteTableLock.Lock, TableLock);
        return NULL;
    }
}

//** LookupNextHopWithBuffer - Look up the next hop, with packet information.
//
//  Called when we need to find the next hop on our way to a destination and we
//  have packet information that we may use for dial on demand support. We call
//  LookupRTE to find it, and return the appropriate information. We may bring
//  up the link if neccessary.
//
//  In a PnP build, the interface is referenced here.
//
//  Entry:  Destination     - IP address we're trying to reach.
//          Src             - Source address of datagram being routed.
//          NextHop         - Pointer to IP address of next hop (returned).
//          MTU             - Pointer to where to return max MTU used on the
//                            route.
//          Protocol        - Protocol type for packet that's causing this
//                            lookup.
//          Buffer          - Pointer to first part of packet causing lookup.
//          Length          - Length of Buffer.
//          HdrSrc          - source addres from header
//          UnicastIf       - Iface to constrain lookup to, 0 if unconstrained
//
//  Returns: Pointer to outgoing interface if we found one, NULL otherwise.
//
Interface *
LookupNextHopWithBuffer(IPAddr Destination, IPAddr Src, IPAddr *NextHop,
                        uint * MTU, uchar Protocol, uchar *Buffer, uint Length,
                        RouteCacheEntry **fwdRCE, LinkEntry **Link,
                        IPAddr HdrSrc, uint UnicastIf)
{
    CTELockHandle TableLock;    // Lock handle for routing table.
    RouteTableEntry *Route;        // Pointer to route table entry for route.
    Interface *IF;

    CTEGetLock(&RouteTableLock.Lock, &TableLock);
    Route = LookupRTE(Destination, Src, HOST_ROUTE_PRI, UnicastIf);

    if (Route != (RouteTableEntry *) NULL) {

        // If this is a direct route, send straight to the destination.
        *NextHop = IP_ADDR_EQUAL(Route->rte_addr, IPADDR_LOCAL) ? Destination :
            Route->rte_addr;

        // If this is an indirect route, we can use the forwarding RCE
        if (fwdRCE) {
#if REM_OPT
            *fwdRCE = IP_ADDR_EQUAL(Route->rte_addr, IPADDR_LOCAL) ? NULL :
#else
            *fwdRCE =
#endif
                (RouteCacheEntry *) STRUCT_OF(RouteCacheEntry,
                                              &Route->rte_arpcontext,
                                              rce_context);
        }

        // See if the route we found is connected. If not, try to connect it.
        if (!(Route->rte_flags & RTE_IF_VALID)) {
            Route = FindValidIFForRTE(Route, Destination, Src, Protocol, Buffer,
                                      Length, HdrSrc);
            if (Route == NULL) {
                // Couldn't bring it up.
                CTEFreeLock(&RouteTableLock.Lock, TableLock);
                return NULL;
            } else
                IF = Route->rte_if;
        } else
            IF = Route->rte_if;

        // if the route is on a P2MP interface get the mtu from the
        // link associated with the route
        if (Route->rte_link)
            *MTU = Route->rte_link->link_mtu;
        else
            *MTU = Route->rte_mtu;

        if (Link) {
            *Link = Route->rte_link;
            if (Route->rte_link) {
                CTEInterlockedIncrementLong(&Route->rte_link->link_refcount);
            }
        }
        LOCKED_REFERENCE_IF(IF);
        CTEFreeLock(&RouteTableLock.Lock, TableLock);
        return IF;
    } else {                    // Couldn't find a route.

        CTEFreeLock(&RouteTableLock.Lock, TableLock);
        return NULL;
    }
}

//** LookupForwardingNextHop - Look up the next hop on which to forward packet on.
//
//  Called when we need to find the next hop on our way to a destination and we
//  have packet information that we may use for dial on demand support. We call
//  LookupRTE to find it, and return the appropriate information. We may bring
//  up the link if neccessary.
//
//  In a PnP build, the interface is referenced here.
//
//  Entry:  Destination     - IP address we're trying to reach.
//          Src             - Source address of datagram being routed.
//          NextHop         - Pointer to IP address of next hop (returned).
//          MTU             - Pointer to where to return max MTU used on the
//                            route.
//          Protocol        - Protocol type for packet that's causing this
//                            lookup.
//          Buffer          - Pointer to first part of packet causing lookup.
//          Length          - Length of Buffer.
//          HdrSrc          - source addres from header
//
//  Returns: Pointer to outgoing interface if we found one, NULL otherwise.
//
Interface *
LookupForwardingNextHop(IPAddr Destination, IPAddr Src, IPAddr *NextHop,
                        uint * MTU, uchar Protocol, uchar *Buffer, uint Length,
                        RouteCacheEntry **fwdRCE, LinkEntry **Link,
                        IPAddr HdrSrc)
{
    CTELockHandle TableLock;    // Lock handle for routing table.
    RouteTableEntry *Route;        // Pointer to route table entry for route.
    Interface *IF;

    CTEGetLock(&RouteTableLock.Lock, &TableLock);
    Route = LookupForwardRTE(Destination, Src, TRUE);

    if (Route != (RouteTableEntry *) NULL) {

        // If this is a direct route, send straight to the destination.
        *NextHop = IP_ADDR_EQUAL(Route->rte_addr, IPADDR_LOCAL) ? Destination :
            Route->rte_addr;

        // If this is an indirect route, we can use the forwarding RCE
        if (fwdRCE) {
#if REM_OPT
            *fwdRCE = IP_ADDR_EQUAL(Route->rte_addr, IPADDR_LOCAL) ? NULL :
#else
            *fwdRCE =
#endif
                (RouteCacheEntry *) STRUCT_OF(RouteCacheEntry,
                                              &Route->rte_arpcontext,
                                              rce_context);
        }

        // See if the route we found is connected. If not, try to connect it.
        if (!(Route->rte_flags & RTE_IF_VALID)) {
            Route = FindValidIFForRTE(Route, Destination, Src, Protocol, Buffer,
                                      Length, HdrSrc);
            if (Route == NULL) {
                // Couldn't bring it up.
                CTEFreeLock(&RouteTableLock.Lock, TableLock);
                return NULL;
            } else
                IF = Route->rte_if;
        } else
            IF = Route->rte_if;

        // if the route is on a P2MP interface get the mtu from the
        // link associated with the route
        if (Route->rte_link)
            *MTU = Route->rte_link->link_mtu;
        else
            *MTU = Route->rte_mtu;

        if (Link) {
            *Link = Route->rte_link;
            if (Route->rte_link) {
                CTEInterlockedIncrementLong(&Route->rte_link->link_refcount);
            }
        }
        LOCKED_REFERENCE_IF(IF);
        CTEFreeLock(&RouteTableLock.Lock, TableLock);
        return IF;
    } else {                    // Couldn't find a route.

        CTEFreeLock(&RouteTableLock.Lock, TableLock);
        return NULL;
    }
}

//* RTReadNext - Read the next route in the table.
//
//  Called by the GetInfo code to read the next route in the table. We assume
//  the context passed in is valid, and the caller has the RouteTableLock.
//
//  Input:  Context     - Pointer to a RouteEntryContext.
//          Buffer      - Pointer to an IPRouteEntry structure.
//
//  Returns: TRUE if more data is available to be read, FALSE is not.
//
uint
RTReadNext(void *Context, void *Buffer)
{
    RouteEntryContext *REContext = (RouteEntryContext *) Context;
    IPRouteEntry *IPREntry = (IPRouteEntry *) Buffer;
    RouteTableEntry *CurrentRTE=NULL;
    uint i;
    uint Now = CTESystemUpTime() / 1000L;
    Interface *IF;
    NetTableEntry *SrcNTE;

    UINT retVal = GetNextRoute(Context, &CurrentRTE);

    // Should always have the rte because we don't have empty route tables.
    //
    ASSERT(CurrentRTE);

    // Fill in the buffer.
    IF = CurrentRTE->rte_if;

    IPREntry->ire_dest = CurrentRTE->rte_dest;
    IPREntry->ire_index = IF->if_index;
    IPREntry->ire_metric1 = CurrentRTE->rte_metric;
    IPREntry->ire_metric2 = IRE_METRIC_UNUSED;
    IPREntry->ire_metric3 = IRE_METRIC_UNUSED;
    IPREntry->ire_metric4 = IRE_METRIC_UNUSED;
    IPREntry->ire_metric5 = IRE_METRIC_UNUSED;
    IPREntry->ire_nexthop = GetNextHopForRTE(CurrentRTE);
    IPREntry->ire_type = (CurrentRTE->rte_flags & RTE_VALID ?
                          CurrentRTE->rte_type : IRE_TYPE_INVALID);
    IPREntry->ire_proto = CurrentRTE->rte_proto;
    IPREntry->ire_age = Now - CurrentRTE->rte_valid;
    IPREntry->ire_mask = CurrentRTE->rte_mask;
    IPREntry->ire_context = CurrentRTE->rte_context;

    return retVal;
}

//* RTRead - Read the next route in the table.
//
//  Called by the GetInfo code to read the next route in the table. We assume
//  the context passed in is valid, and the caller has the RouteTableLock.
//
//  Input:  Context     - Pointer to a RouteEntryContext.
//          Buffer      - Pointer to an IPRouteEntry structure.
//
//  Returns:
//

//* RtRead -  Read a route
//
//  Returns: Status of attempt to add route.
//
uint
RTRead(void *pContext, void *pBuffer)
{
    IPRouteLookupData *pRLData = (IPRouteLookupData *) pContext;
    IPRouteEntry *pIPREntry = (IPRouteEntry *) pBuffer;
    RouteTableEntry *pCurrentRTE;
    uint i;
    uint Now = CTESystemUpTime() / 1000L;
    Interface *pIF;
    NetTableEntry *pSrcNTE;

    ASSERT((pContext != NULL) && (pBuffer != NULL));
    pCurrentRTE = LookupRTE(pRLData->DestAdd, pRLData->SrcAdd,
                            HOST_ROUTE_PRI, FALSE);

    if (pCurrentRTE == NULL) {
        pIPREntry->ire_index = 0xffffffff;
        return TDI_DEST_HOST_UNREACH;
    }
    // Fill in the buffer.
    pIF = pCurrentRTE->rte_if;

    pIPREntry->ire_dest = pCurrentRTE->rte_dest;
    pIPREntry->ire_index = pIF->if_index;
    pIPREntry->ire_metric1 = pCurrentRTE->rte_metric;
    pIPREntry->ire_metric2 = IRE_METRIC_UNUSED;
    pIPREntry->ire_metric3 = IRE_METRIC_UNUSED;
    pIPREntry->ire_metric4 = IRE_METRIC_UNUSED;
    pIPREntry->ire_metric5 = IRE_METRIC_UNUSED;
    pIPREntry->ire_nexthop = GetNextHopForRTE(pCurrentRTE);
    pIPREntry->ire_type = (pCurrentRTE->rte_flags & RTE_VALID ?
                           pCurrentRTE->rte_type : IRE_TYPE_INVALID);
    pIPREntry->ire_proto = pCurrentRTE->rte_proto;
    pIPREntry->ire_age = Now - pCurrentRTE->rte_valid;
    pIPREntry->ire_mask = pCurrentRTE->rte_mask;
    pIPREntry->ire_context = pCurrentRTE->rte_context;
    return TDI_SUCCESS;
}

void
LookupRoute(IPRouteLookupData * pRLData, IPRouteEntry * pIpRTE)
{

    CTELockHandle Handle;

    CTEGetLock(&RouteTableLock.Lock, &Handle);

    RTRead(pRLData, pIpRTE);

    CTEFreeLock(&RouteTableLock.Lock, Handle);
    return;
}

NTSTATUS
LookupRouteInformation(void *pRouteLookupData, void *pIpRTE,
                       IPROUTEINFOCLASS RouteInfoClass, void *RouteInformation,
                       uint * RouteInfoLength)
{
    return LookupRouteInformationWithBuffer(pRouteLookupData, NULL, 0, pIpRTE,
                                            RouteInfoClass, RouteInformation,
                                            RouteInfoLength);
}

NTSTATUS
LookupRouteInformationWithBuffer(void *pRouteLookupData, uchar * Buffer,
                                 uint Length, void *pIpRTE,
                                 IPROUTEINFOCLASS RouteInfoClass,
                                 void *RouteInformation, uint * RouteInfoLength)
{

    IPRouteLookupData *pRLData = (IPRouteLookupData *) pRouteLookupData;
    IPRouteEntry *pIPREntry = (IPRouteEntry *) pIpRTE;
    RouteTableEntry *pCurrentRTE;
    uint i;
    uint Now = CTESystemUpTime() / 1000L;
    Interface *pIF;
    NetTableEntry *pSrcNTE;
    CTELockHandle Handle;

    CTEGetLock(&RouteTableLock.Lock, &Handle);

    ASSERT(pRouteLookupData != NULL);
    pCurrentRTE = LookupRTE(pRLData->DestAdd, pRLData->SrcAdd, HOST_ROUTE_PRI, FALSE);

    if (pCurrentRTE == NULL) {
        CTEFreeLock(&RouteTableLock.Lock, Handle);
        return STATUS_UNSUCCESSFUL;
    }
    // see if the RTE is for a demand-dial route,
    if (!(pCurrentRTE->rte_flags & RTE_IF_VALID)) {
        pCurrentRTE = FindValidIFForRTE(pCurrentRTE, pRLData->DestAdd,
                                        pRLData->SrcAdd, pRLData->Info[0],
                                        Buffer, Length, pRLData->SrcAdd);
        CTEFreeLock(&RouteTableLock.Lock, Handle);
        if (pCurrentRTE == NULL) {
            // Couldn't bring it up.
            return STATUS_UNSUCCESSFUL;
        }
        return STATUS_PENDING;
    }
    // Fill in the buffer.
    pIF = pCurrentRTE->rte_if;

    if (pIPREntry) {
        pIPREntry->ire_dest = pCurrentRTE->rte_dest;
        pIPREntry->ire_index = pIF->if_index;
        pIPREntry->ire_metric1 = pCurrentRTE->rte_metric;
        pIPREntry->ire_metric2 = IRE_METRIC_UNUSED;
        pIPREntry->ire_metric3 = IRE_METRIC_UNUSED;
        pIPREntry->ire_metric4 = IRE_METRIC_UNUSED;
        pIPREntry->ire_metric5 = IRE_METRIC_UNUSED;
        pIPREntry->ire_nexthop = GetNextHopForRTE(pCurrentRTE);
        pIPREntry->ire_type = (pCurrentRTE->rte_flags & RTE_VALID ?
                               pCurrentRTE->rte_type : IRE_TYPE_INVALID);
        pIPREntry->ire_proto = pCurrentRTE->rte_proto;
        pIPREntry->ire_age = Now - pCurrentRTE->rte_valid;
        pIPREntry->ire_mask = pCurrentRTE->rte_mask;
        pIPREntry->ire_context = pCurrentRTE->rte_context;
    }
    switch (RouteInfoClass) {
    case IPRouteOutgoingFirewallContext:
        *(PULONG) RouteInformation = pIF->if_index;
        *(PULONG) RouteInfoLength = sizeof(PVOID);
        break;

    case IPRouteOutgoingFilterContext:
        *(PVOID *) RouteInformation = NULL;
        *(PULONG) RouteInfoLength = sizeof(PVOID);
        break;
    }

    CTEFreeLock(&RouteTableLock.Lock, Handle);
    return STATUS_SUCCESS;
}

//* DeleteRTE - Delete an RTE.
//
//  Called when we need to delete an RTE. We assume the caller has the
//  RouteTableLock. We'll splice out the RTE, invalidate his RCEs, and
//  free the memory.
//
//  Input:  PrevRTE     - RTE in 'front' of one being deleted.
//          RTE         - RTE to be deleted.
//
//  Returns: Nothing.
//
void
DeleteRTE(RouteTableEntry * PrevRTE, RouteTableEntry * RTE)
{
    IPSInfo.ipsi_numroutes--;

    if (RTE->rte_mask == DEFAULT_MASK) {
        // We're deleting a default route.
        DefGWConfigured--;
        if (RTE->rte_flags & RTE_VALID)
            DefGWActive--;
        UpdateDeadGWState();
        if (DefGWActive == 0)
            ValidateDefaultGWs(NULL_IP_ADDR);

    }

    if (RTE->rte_todg) {
        RTE->rte_todg->rte_fromdg = NULL;
    }
    if (RTE->rte_fromdg) {
        RTE->rte_fromdg->rte_todg = NULL;
    }

    {
        RouteTableEntry *tmpRTE = NULL;
        tmpRTE = GetDefaultGWs(&tmpRTE);

        while (tmpRTE) {
            if (tmpRTE->rte_todg == RTE) {
                tmpRTE->rte_todg = NULL;
            }
            tmpRTE = tmpRTE->rte_next;
        }
    }

    InvalidateRCEChain(RTE);

    // Make sure RTE's IF is valid
    ASSERT(RTE->rte_if != NULL);

    // Invalidate the fwding rce

    if (RTE->rte_if != (Interface *) & DummyInterface) {
        (*(RTE->rte_if->if_invalidate)) (RTE->rte_if->if_lcontext,
                                         (RouteCacheEntry *) STRUCT_OF(RouteCacheEntry,
                                                                       &RTE->rte_arpcontext,
                                                                       rce_context));
    }

    // Free the old route.
    FreeRoute(RTE);
}

//* DeleteRTEOnIF - Delete all address-dependent RTEs on a particular IF.
//
//  A function called by RTWalk when we want to delete all RTEs on a particular
//  inteface, except those that are present for the lifetime of the interface.
//  We just check the I/F of each RTE, and if it matches we return FALSE.
//
//  Input:  RTE             - RTE to check.
//          Context         - Interface on which we're deleting.
//
//  Returns: FALSE if we want to delete it, TRUE otherwise.
//
uint
DeleteRTEOnIF(RouteTableEntry * RTE, void *Context, void *Context1)
{
    Interface *IF = (Interface *) Context;

    if (RTE->rte_if == IF && !IP_ADDR_EQUAL(RTE->rte_dest, IF->if_bcast))
        return FALSE;
    else
        return TRUE;

}

//* DeleteAllRTEOnIF - Delete all RTEs on a particular IF.
//
//  A function called by RTWalk when we want to delete all RTEs on a particular
//  inteface. We just check the I/F of each RTE, and if it matches we return
//  FALSE.
//
//  Input:  RTE             - RTE to check.
//          Context         - Interface on which we're deleting.
//
//  Returns: FALSE if we want to delete it, TRUE otherwise.
//
uint
DeleteAllRTEOnIF(RouteTableEntry * RTE, void *Context, void *Context1)
{
    Interface *IF = (Interface *) Context;

    if (RTE->rte_if == IF)
        return FALSE;
    else
        return TRUE;

}


//* InvalidateRCEOnIF - Invalidate all RCEs on a particular IF.
//
//  A function called by RTWalk when we want to invalidate all RCEs on a
//  particular inteface. We just check the I/F of each RTE, and if it
//  matches we call InvalidateRCEChain to invalidate the RCEs.
//
//  Input:  RTE             - RTE to check.
//          Context         - Interface on which we're invalidating.
//
//  Returns: TRUE.
//
uint
InvalidateRCEOnIF(RouteTableEntry * RTE, void *Context, void *Context1)
{
    Interface *IF = (Interface *) Context;

    if (RTE->rte_if == IF)
        InvalidateRCEChain(RTE);

    return TRUE;

}


//* SetMTUOnIF - Set the MTU on an interface.
//
//  Called when we need to set the MTU on an interface.
//
//  Input:  RTE             - RTE to check.
//          Context         - Pointer to a context.
//          Context1        - Pointer to the new MTU.
//
//  Returns: TRUE.
//
uint
SetMTUOnIF(RouteTableEntry * RTE, void *Context, void *Context1)
{
    uint NewMTU = *(uint *) Context1;
    Interface *IF = (Interface *) Context;

    if (RTE->rte_if == IF)
        RTE->rte_mtu = NewMTU;

    return TRUE;
}

//* SetMTUToAddr - Set the MTU to a specific address.
//
//  Called when we need to set the MTU to a specific address. We set the MTU
//  for all routes that use the specified address as a first hop to the new
//  MTU.
//
//  Input:  RTE             - RTE to check.
//          Context         - Pointer to a context.
//          Context1        - Pointer to the new MTU.
//
//  Returns: TRUE.
//
uint
SetMTUToAddr(RouteTableEntry * RTE, void *Context, void *Context1)
{
    uint NewMTU = *(uint *) Context1;
    IPAddr Addr = *(IPAddr *) Context;

    if (IP_ADDR_EQUAL(RTE->rte_addr, Addr))
        RTE->rte_mtu = NewMTU;

    return TRUE;
}

//** FreeRtChangeList - Frees a route-change notification list.
//
//  Called to clean up a list of route-change notifications in the failure path
//  of 'RTWalk' and 'IPRouteTimeout'.
//
//  Entry:  RtChangeList    - The list to be freed.
//
//  Returns: Nothing.
//
void
FreeRtChangeList(RtChangeList* CurrentRtChangeList)
{
    RtChangeList *TmpRtChangeList;
    while (CurrentRtChangeList) {
        TmpRtChangeList = CurrentRtChangeList->rt_next;
        CTEFreeMem(CurrentRtChangeList);
        CurrentRtChangeList = TmpRtChangeList;
    }
}

//* RTWalk - Routine to walk the route table.
//
//  This routine walks the route table, calling the specified function
//  for each entry. If the called function returns FALSE, the RTE is
//  deleted.
//
//  Input:  CallFunc    - Function to call for each entry.
//          Context     - Context value to pass to each call.
//
//  Returns: Nothing.
//
void
RTWalk(uint(*CallFunc) (struct RouteTableEntry *, void *, void *),
       void *Context, void *Context1)
{
    uint            i;
    CTELockHandle   Handle;
    RouteTableEntry *RTE, *PrevRTE;
    RouteTableEntry *pOldBestRTE, *pNewBestRTE;
    UINT            IsDataLeft, IsValid;
    UCHAR           IteratorContext[CONTEXT_SIZE];
    RtChangeList    *CurrentRtChangeList = NULL;

    CTEGetLock(&RouteTableLock.Lock, &Handle);

    // Zero the context the first time it is used
    RtlZeroMemory(IteratorContext, CONTEXT_SIZE);

    // Do we have any routes in the table ?
    IsDataLeft = RTValidateContext(IteratorContext, &IsValid);

    if (IsDataLeft) {
        // Get the first route in the table
        IsDataLeft = GetNextRoute(IteratorContext, &RTE);

        while (IsDataLeft) {
            // Keep copy of current route and advance to next
            PrevRTE = RTE;

            // Read next route, before operating on current
            IsDataLeft = GetNextRoute(IteratorContext, &RTE);

            // Work on current route (already got next one)
            if (!(*CallFunc) (PrevRTE, Context, Context1)) {
                IPRouteNotifyOutput RNO = {0};
                RtChangeList        *NewRtChange;

                // Retrieve information about the route for change-notification
                // before proceeding with deletion.

                RNO.irno_dest = PrevRTE->rte_dest;
                RNO.irno_mask = PrevRTE->rte_mask;
                RNO.irno_nexthop = GetNextHopForRTE(PrevRTE);
                RNO.irno_proto = PrevRTE->rte_proto;
                RNO.irno_ifindex = PrevRTE->rte_if->if_index;
                RNO.irno_metric = PrevRTE->rte_metric;
                RNO.irno_flags = IRNO_FLAG_DELETE;

                // Delete the route and perform cleanup.

                DelRoute(PrevRTE->rte_dest, PrevRTE->rte_mask,
                         PrevRTE->rte_addr, PrevRTE->rte_if, MATCH_FULL,
                         &PrevRTE, &pOldBestRTE, &pNewBestRTE);

                CleanupP2MP_RTE(PrevRTE);
                CleanupRTE(PrevRTE);

                // Allocate, initialize and queue a change-notification entry
                // for the deleted route.

                NewRtChange = CTEAllocMemNBoot(sizeof(RtChangeList), '9iCT');
                if (NewRtChange != NULL) {
                    NewRtChange->rt_next = CurrentRtChangeList;
                    NewRtChange->rt_info = RNO;
                    CurrentRtChangeList = NewRtChange;
                }

#if FFP_SUPPORT
                FFPFlushRequired = TRUE;
#endif
            }
        }

        // Work on last route [it was not processed in the loop]
        PrevRTE = RTE;

        if (!(*CallFunc) (PrevRTE, Context, Context1)) {

            IPRouteNotifyOutput RNO = {0};
            RtChangeList        *NewRtChange;

            // Retrieve information about the route for change-notification
            // before proceeding with deletion.

            RNO.irno_dest = PrevRTE->rte_dest;
            RNO.irno_mask = PrevRTE->rte_mask;
            RNO.irno_nexthop = GetNextHopForRTE(PrevRTE);
            RNO.irno_proto = PrevRTE->rte_proto;
            RNO.irno_ifindex = PrevRTE->rte_if->if_index;
            RNO.irno_metric = PrevRTE->rte_metric;
            RNO.irno_flags = IRNO_FLAG_DELETE;

            // Delete the route and perform cleanup.

            DelRoute(PrevRTE->rte_dest, PrevRTE->rte_mask, PrevRTE->rte_addr,
                     PrevRTE->rte_if, MATCH_FULL, &PrevRTE, &pOldBestRTE,
                     &pNewBestRTE);

            CleanupP2MP_RTE(PrevRTE);
            CleanupRTE(PrevRTE);

            // Allocate, initialize and queue a change-notification entry
            // for the deleted route.

            NewRtChange = CTEAllocMemNBoot(sizeof(RtChangeList), '0iCT');
            if (NewRtChange != NULL) {
                NewRtChange->rt_next = CurrentRtChangeList;
                NewRtChange->rt_info = RNO;
                CurrentRtChangeList = NewRtChange;
            }

#if FFP_SUPPORT
            FFPFlushRequired = TRUE;
#endif
        }
    }

    CTEFreeLock(&RouteTableLock.Lock, Handle);

    // Call RtChangeNotify for each of the entries in the change-notification
    // list that we've built up so far. In the process, free each entry.

    if (CurrentRtChangeList) {
        RtChangeList    *TmpRtChangeList;

        do {
            TmpRtChangeList = CurrentRtChangeList->rt_next;
            RtChangeNotify(&CurrentRtChangeList->rt_info);
            CTEFreeMem(CurrentRtChangeList);
            CurrentRtChangeList = TmpRtChangeList;
        } while(CurrentRtChangeList);
    }
}

uint
AttachRCEToNewRTE(RouteTableEntry *NewRTE, RouteCacheEntry *RCE,
                  RouteTableEntry *OldRTE)
{
    CTELockHandle TableHandle, RCEHandle;
    RouteCacheEntry *tempRCE, *CurrentRCE;
    NetTableEntry *NTE;
    uint Status = 1;
    uint RCE_usecnt;

    if (RCE == NULL) {
        CurrentRCE = OldRTE->rte_rcelist;

    } else {
        CurrentRCE = RCE;
    }

    // KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"AttachRCETonewRTE %x %x %x\n", NewRTE, RCE, OldRTE));

    // OldRTE = RCE->rce_rte;

    //associate all the RCEs with this RTE

    while (CurrentRCE != NULL) {

        RCE_usecnt = InvalidateRCE(CurrentRCE);

        CTEGetLock(&CurrentRCE->rce_lock, &RCEHandle);

        tempRCE = CurrentRCE->rce_next;

        // if no one is using this go ahead and
        // mark this as valid

        if (RCE_usecnt == 0) {

            //Make sure that the src address for RCE is valid
            //for this RTE

            NTE = NewRTE->rte_if->if_nte;

            while (NTE) {

                if ((NTE->nte_flags & NTE_VALID) &&
                    IP_ADDR_EQUAL(CurrentRCE->rce_src, NTE->nte_addr))
                    break;
                NTE = NTE->nte_ifnext;
            }

            if (NTE != NULL) {

                if (CurrentRCE->rce_flags & RCE_CONNECTED) {
                    Interface *IF = (Interface*)CurrentRCE->rce_rte;
                    (*(IF->if_invalidate))(IF->if_lcontext, CurrentRCE);
                    if (CurrentRCE->rce_flags & RCE_REFERENCED) {
                        LockedDerefIF(IF);
                        CurrentRCE->rce_flags &= ~RCE_REFERENCED;
                    }
                } else {
                    ASSERT(!(CurrentRCE->rce_flags & RCE_REFERENCED));
                }

                // Link the RCE on the RTE, and set up the back pointer.
                CurrentRCE->rce_rte = NewRTE;
                CurrentRCE->rce_flags |= RCE_VALID;
                CurrentRCE->rce_next = NewRTE->rte_rcelist;
                NewRTE->rte_rcelist = CurrentRCE;

                NewRTE->rte_rces += CurrentRCE->rce_cnt;

                if ((NewRTE->rte_flags & RTE_IF_VALID)) {

                    CurrentRCE->rce_flags |= (RCE_CONNECTED | RCE_REFERENCED);
                    LOCKED_REFERENCE_IF(NewRTE->rte_if);
                } else {

                    ASSERT(FALSE);
                    CurrentRCE->rce_flags &= ~RCE_CONNECTED;
                    Status = FALSE;
                }

            }                    //if NTE!=NULL

        } else {

            // In use. Mark it as in dead gw transit mmode
            // so that attachtorte will do the right thing

            // KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"AttachRCETonewRTE RCE busy\n"));
            // CurrentRCE->rce_rte = NewRTE;

            CurrentRCE->rce_flags |= RCE_DEADGW;

        }                        //in use

        CTEFreeLock(&CurrentRCE->rce_lock, RCEHandle);

        //if there is only one RCE to be switched, break.

        if (RCE)
            break;

        CurrentRCE = tempRCE;

    }                            //while

    return (Status);
}

//** AttachRCEToRTE - Attach an RCE to an RTE.
//
//  This procedure takes an RCE, finds the appropriate RTE, and attaches it.
//  We check to make sure that the source address is still valid.
//
//  Entry:  RCE             - RCE to be attached.
//          Protocol        - Protocol type for packet causing this call.
//          Buffer          - Pointer to buffer for packet causing this
//                            call.
//          Length          - Length of buffer.
//
//  Returns: TRUE if we attach it, false if we don't.
//
uint
AttachRCEToRTE(RouteCacheEntry *RCE, uchar Protocol, uchar *Buffer, uint Length)
{
    CTELockHandle TableHandle, RCEHandle;
    RouteTableEntry *RTE;
    NetTableEntry *NTE;
    uint Status;
    NetTableEntry *NetTableList;

    CTEGetLock(&RouteTableLock.Lock, &TableHandle);

    NetTableList = NewNetTableList[NET_TABLE_HASH(RCE->rce_src)];
    for (NTE = NetTableList; NTE != NULL; NTE = NTE->nte_next)
        if ((NTE->nte_flags & NTE_VALID) &&
            IP_ADDR_EQUAL(RCE->rce_src, NTE->nte_addr))
            break;

    if (NTE == NULL) {
        // Didn't find a match.
        CTEFreeLock(&RouteTableLock.Lock, TableHandle);
        return FALSE;
    }
    if ((RCE->rce_flags == RCE_VALID) && (RCE->rce_rte->rte_flags != RTE_IF_VALID)) {
        RTE = RCE->rce_rte;
    } else {
        RTE = LookupRTE(RCE->rce_dest, RCE->rce_src, HOST_ROUTE_PRI, FALSE);
    }

    if (RTE == NULL) {
        // No route! Fail the call.
        CTEFreeLock(&RouteTableLock.Lock, TableHandle);
        return FALSE;
    }

    // Check if this RCE is in transition (usecnt did not permit
    // to swicthover earlier)

    if ((RCE->rce_flags & RCE_DEADGW) && (RCE->rce_rte != RTE)) {

        RouteTableEntry *tmpRTE = NULL;


        // Scan through DefaultGWs checking
        // for a GW that is in the process of
        // taking over from the current one.


        if (RTE->rte_todg) {
            tmpRTE = GetDefaultGWs(&tmpRTE);

            while (tmpRTE) {
               if (tmpRTE == RTE->rte_todg) {
                   break;
               }
               tmpRTE = tmpRTE->rte_next;
            }

        }
        if (tmpRTE) {

            // Remove references to GW
            // in transition and the current one

            ASSERT(tmpRTE->rte_fromdg == RTE);
            tmpRTE->rte_fromdg = NULL;
            RTE->rte_todg = NULL;
        }

        Rcefailures++;
    }

    Status = TRUE;

    // Yep, we found one. Get the lock on the RCE, and make sure he's
    // not pointing at an RTE already. We also need to make sure that the usecnt
    // is 0, so that we can invalidate the RCE at the low level. If we set valid
    // to TRUE without doing this we may get into a wierd situation where we
    // link the RCE onto an RTE but the lower layer information is wrong, so we
    // send to IP address X at mac address Y. So to be safe we don't set valid
    // to TRUE until both usecnt is 0 and valid is FALSE. We'll keep coming
    // through this routine on every send until that happens.

    CTEGetLock(&RCE->rce_lock, &RCEHandle);
    if (RCE->rce_usecnt == 0) {
        // Nobody is using him, so we can link him up.
        if (!(RCE->rce_flags & RCE_VALID)) {
            Interface *IF, *tmpIF;
            // He's not valid. Invalidate the lower layer info, just in
            // case. Make sure he's connected before we try to do this. If
            // he's not marked as connected, don't bother to try and invalidate
            // him as there is no interface.

            if (RCE->rce_flags & RCE_CONNECTED) {

                IF = (Interface *) RCE->rce_rte;

                // invalidating this IF can fail in PNP world. An invalid RCE can not be found on on RTE list
                // to be invalidated if Interface decides to take off!
                // So, check the sanity of the interface

                for (tmpIF = IFList; tmpIF != NULL; tmpIF = tmpIF->if_next) {
                    if (tmpIF == IF)
                        break;

                }
                if (tmpIF) {
                    (*(IF->if_invalidate)) (IF->if_lcontext, RCE);
                } else {
                    RtlZeroMemory(RCE->rce_context, RCE_CONTEXT_SIZE);
                }
                if (RCE->rce_flags & RCE_REFERENCED) {
                    if (tmpIF)
                        LockedDerefIF(IF);
                    RCE->rce_flags &= ~RCE_REFERENCED;
                }
            } else {
                ASSERT(!(RCE->rce_flags & RCE_REFERENCED));
            }

            // Link the RCE on the RTE, and set up the back pointer.
            RCE->rce_rte = RTE;
            RCE->rce_flags |= RCE_VALID;
            RCE->rce_next = RTE->rte_rcelist;
            RTE->rte_rcelist = RCE;
            RTE->rte_rces += RCE->rce_cnt;
            RCE->rce_flags &= ~RCE_DEADGW;

            // Make sure the RTE is connected. If not, try to connect him.
            if (!(RTE->rte_flags & RTE_IF_VALID)) {
                // Not connected. Try to connect him.
                RTE = FindValidIFForRTE(RTE, RCE->rce_dest, RCE->rce_src,
                                        Protocol, Buffer, Length, RCE->rce_src);
                if (RTE != NULL) {
                    // Got one, so mark as connected.
                    ASSERT(!(RCE->rce_flags & RCE_REFERENCED));
                    RCE->rce_flags |= (RCE_CONNECTED | RCE_REFERENCED);
                    LOCKED_REFERENCE_IF(RTE->rte_if);
                } else {

                    // Couldn't get a valid i/f. Mark the RCE as not connected,
                    // and set up to fail this call.
                    RCE->rce_flags &= ~RCE_CONNECTED;
                    Status = FALSE;
                }
            } else {
                // The RTE is connected, mark the RCE as connected.
                ASSERT(!(RCE->rce_flags & RCE_REFERENCED));
                RCE->rce_flags |= (RCE_CONNECTED | RCE_REFERENCED);
                LOCKED_REFERENCE_IF(RTE->rte_if);
            }
        } else {

            // The RCE is valid. See if it's connected.
            if (!(RCE->rce_flags & RCE_CONNECTED)) {

                // Not connected, try to get a valid i/f.
                if (!(RTE->rte_flags & RTE_IF_VALID)) {
                    RTE = FindValidIFForRTE(RTE, RCE->rce_dest, RCE->rce_src,
                                            Protocol, Buffer, Length, RCE->rce_src);
                    if (RTE != NULL) {
                        RCE->rce_flags |= RCE_CONNECTED;
                        ASSERT(!(RCE->rce_flags & RCE_REFERENCED));
                        ASSERT(RTE == RCE->rce_rte);
                        RCE->rce_flags |= RCE_REFERENCED;
                        LOCKED_REFERENCE_IF(RTE->rte_if);
                    } else {

                        // Couldn't connect, so fail.
                        Status = FALSE;
                    }
                } else {        // Already connected, just mark as valid.

                    RCE->rce_flags |= RCE_CONNECTED;
                    if (!(RCE->rce_flags & RCE_REFERENCED)) {
                        RCE->rce_flags |= RCE_REFERENCED;
                        LOCKED_REFERENCE_IF(RTE->rte_if);
                    }
                }
            }
        }
    }
    // Free the locks and we're done.
    CTEFreeLock(&RCE->rce_lock, RCEHandle);
    CTEFreeLock(&RouteTableLock.Lock, TableHandle);
    return Status;

}

//** IPGetPInfo - Get information..
//
//  Called by an upper layer to get information about a path. We return the
//  MTU of the path and the maximum link speed to be expected on the path.
//
//  Input:  Dest            - Destination address.
//          Src             - Src address.
//          NewMTU          - Where to store path MTU (may be NULL).
//          MaxPathSpeed    - Where to store maximum path speed (may be NULL).
//          RCE             - RCE to be used to find the route
//
//  Returns: Status of attempt to get new MTU.
//
IP_STATUS
IPGetPInfo(IPAddr Dest, IPAddr Src, uint * NewMTU, uint *MaxPathSpeed,
           RouteCacheEntry *RCE)
{
    CTELockHandle Handle;
    RouteTableEntry *RTE = NULL;
    IP_STATUS Status;

    if (RCE) {
        CTEGetLock(&RCE->rce_lock, &Handle);
        if (RCE->rce_flags == RCE_ALL_VALID) {
            RTE = RCE->rce_rte;
        }
        CTEFreeLock(&RCE->rce_lock, Handle);
    }
    CTEGetLock(&RouteTableLock.Lock, &Handle);

    if (!RTE) {
        RTE = LookupRTE(Dest, Src, HOST_ROUTE_PRI, FALSE);
    }
    if (RTE != NULL) {
        if (NewMTU != NULL) {
            // if the route is on a P2MP interface get the mtu from the link associated with the route
            if (RTE->rte_link)
                *NewMTU = RTE->rte_link->link_mtu;
            else
                *NewMTU = RTE->rte_mtu;
        }
        if (MaxPathSpeed != NULL)
            *MaxPathSpeed = RTE->rte_if->if_speed;
        Status = IP_SUCCESS;
    } else
        Status = IP_DEST_HOST_UNREACHABLE;

    CTEFreeLock(&RouteTableLock.Lock, Handle);
    return Status;

}

//** IPCheckRoute - Check that a route is valid.
//
//  Called by an upper layer when it believes a route might be invalid.
//  We'll check if we can. If the upper layer is getting there through a
//  route derived via ICMP (presumably a redirect) we'll check to see
//  if it's been learned within the last minute. If it has, it's assumed
//  to still be valid. Otherwise, we'll mark it as down and try to find
//  another route there. If we can, we'll delete the old route. Otherwise
//  we'll leave it. If the route is through a default gateway we'll switch
//  to another one if we can. Otherwise, we'll just leave - we don't mess
//  with manually configured routes.
//
//  Input:  Dest                    - Destination to be reached.
//          Src                     - Src we're sending from.
//          RCE                     - route-cache-entry to be updated.
//          OptInfo                 - options to use if recreating the RCE
//          CheckRouteFlag          - modifies this routine's behavior
//
//  Returns: Nothing.
//
void
IPCheckRoute(IPAddr Dest, IPAddr Src, RouteCacheEntry * RCE, IPOptInfo *OptInfo,
             uint CheckRouteFlag)
{
    RouteTableEntry *RTE;
    RouteTableEntry *NewRTE;
    RouteTableEntry *TempRTE;
    CTELockHandle Handle;
    uint Now = CTESystemUpTime() / 1000L;

    if (DeadGWDetect) {
        uint UnicastIf;

        // We are doing dead G/W detection. Get the lock, and try and
        // find the route.

        // Decide whether to do a strong or weak host lookup.
        UnicastIf = GetIfConstraint(Dest, Src, OptInfo, FALSE);

        CTEGetLock(&RouteTableLock.Lock, &Handle);
        RTE = LookupRTE(Dest, Src, HOST_ROUTE_PRI, UnicastIf);
        if (RTE != NULL && ((Now - RTE->rte_valid) > MIN_RT_VALID)) {

            // Found a route, and it's older than the minimum valid time. If it
            // goes through a G/W, and is a route we learned via ICMP or is a
            // default route, do something with it.
            if (!IP_ADDR_EQUAL(RTE->rte_addr, IPADDR_LOCAL)) {
                // It is through a G/W.

                if (RTE->rte_proto == IRE_PROTO_ICMP) {

                    // Came from ICMP. Mark as invalid, and then make sure
                    // we have another route there.
                    RTE->rte_flags &= ~RTE_VALID;
                    NewRTE = LookupRTE(Dest, Src, HOST_ROUTE_PRI, UnicastIf);

                    if (NewRTE == NULL) {
                        // Can't get there any other way so leave this
                        // one alone.
                        RTE->rte_flags |= RTE_VALID;

                        // Re validate all the other gateways
                        InvalidateRCEChain(RTE);
                        ValidateDefaultGWs(NULL_IP_ADDR);
                    }
                    // The discovered route under the
                    // NTE is not cleaned up.
                    // Since deleting the route itself does not serve any purpose and
                    // the route will time out eventually, let us leave this
                    // as invalid.

                } else {
                    if (RTE->rte_mask == DEFAULT_MASK) {

                        // This is a default gateway. If we have more than one
                        // configured move to the next one.

                        if (DefGWConfigured > 1) {
                            // Have more than one. Try the next one. First
                            // invalidate any RCEs on this G/W.

                            if (DefGWActive == 1) {
                                // No more active. Revalidate all of them,
                                // and try again.
                                ValidateDefaultGWs(NULL_IP_ADDR);
                                //      ASSERT(DefGWActive == DefGWConfigured);
                            } else {

                                //Make sure that we do not switch all the
                                //connections just because of a spurious
                                //dead gate way event.
                                //switch only when % of number of connections are
                                // failed over to the other gateway.

                                // if we have already found the next default gateway
                                // check if it is time to switch all the connections
                                // to it.

                                if (RTE->rte_todg) {

#if DBG
                                    {
                                        RouteTableEntry *tmpRTE = NULL;
                                        tmpRTE = GetDefaultGWs(&tmpRTE);

                                        while (tmpRTE) {
                                            if (tmpRTE == RTE->rte_todg) {
                                                break;
                                            }
                                            tmpRTE = tmpRTE->rte_next;
                                        }
                                        if (tmpRTE == NULL) {
                                            DbgBreakPoint();
                                        }
                                    }
#endif

                                    // KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"to todg %lx\n", RTE));

                                    // If the alternate gateway now has 25%
                                    // as many as the active gateway
                                    // and the caller has not requested
                                    // a switch for this RCE only,
                                    // invalidate the active gateway and
                                    // select the alternate as the new default.
                                    if ((RTE->rte_rcelist == RCE &&
                                        RCE->rce_next == NULL) ||
                                        (RTE->rte_todg->rte_rces >=
                                        (RTE->rte_rces >> 2) &&
                                        !(CheckRouteFlag & CHECK_RCE_ONLY))) {

                                        //Switch every one.

                                        // KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL," Switching every one %x to %x\n", RTE->rte_todg, RTE));
                                        --DefGWActive;
                                        RTE->rte_flags &= ~RTE_VALID;
                                        UpdateDeadGWState();

                                        RTE->rte_todg->rte_fromdg = NULL;
                                        RTE->rte_todg = NULL;

                                        if (RTE->rte_fromdg) {
                                            RTE->rte_fromdg->rte_todg = NULL;
                                        }
                                        RTE->rte_fromdg = NULL;
                                        InvalidateRCEChain(RTE);
                                        //ASSERT(RTE->rte_rces == 0);

                                    } else {

                                        //Switch this particular connection to the new one.

                                        // KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL," attaching RCE %x to newrte %x\n", RCE, RTE->rte_todg));
                                        AttachRCEToNewRTE(RTE->rte_todg, RCE, RTE);
                                    }

                                } else if (RTE->rte_fromdg) {

                                    // find if there are any other gateways other than
                                    // fromdg and switch to that.
                                    // Note that if we have more than 3 default gateways
                                    // configured, this algorithm does not do a god job

                                    RouteTableEntry *OldRTE = RTE;

                                    // KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"GW %x goofed. RTEfromdg %x\n",RTE,RTE->rte_fromdg));

                                    --DefGWActive;
                                    UpdateDeadGWState();
                                    // turn on dead gw flag to tell findrte not to consider this rte

                                    RTE->rte_flags |= RTE_DEADGW;
                                    RTE->rte_fromdg->rte_flags |= RTE_DEADGW;

                                    RTE = FindRTE(Dest, Src, 0,
                                                  DEFAULT_ROUTE_PRI,
                                                  DEFAULT_ROUTE_PRI, UnicastIf);

                                    OldRTE->rte_flags &= ~RTE_DEADGW;
                                    OldRTE->rte_fromdg->rte_flags &= ~RTE_DEADGW;

                                    if (RTE == NULL) {
                                        // No more default gateways! This is bad.
                                        //ASSERT(FALSE);

                                        // KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"No more def routes!\n"));

                                        OldRTE->rte_fromdg->rte_todg = NULL;
                                        OldRTE->rte_fromdg->rte_fromdg = NULL;

                                        OldRTE->rte_fromdg = NULL;

                                        OldRTE->rte_todg = NULL;

                                        ValidateDefaultGWs(NULL_IP_ADDR);

                                        //ASSERT(DefGWActive == DefGWConfigured);

                                    } else {

                                        // we have a third gateway to try!

                                        //                   ASSERT(RTE->rte_mask == DEFAULT_MASK);

                                        //Treat OldRTE as dead!

                                        // KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"Trying next def route %x\n",RTE));

                                        OldRTE->rte_flags &= ~RTE_VALID;

                                        RTE->rte_fromdg = OldRTE->rte_fromdg;
                                        RTE->rte_fromdg->rte_todg = RTE;

                                        if (OldRTE->rte_todg)
                                            OldRTE->rte_todg->rte_fromdg = NULL;

                                        OldRTE->rte_todg = NULL;
                                        OldRTE->rte_fromdg = NULL;

                                        //Attach all the RCEs to the new one

                                        AttachRCEToNewRTE(RTE, NULL, OldRTE);
                                        RTE->rte_valid = Now;

                                    }

                                } else {

                                    //find the next potential default gateway
                                    RouteTableEntry *OldRTE = RTE;

                                    // KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"Finding potential GW\n" ));

                                    OldRTE->rte_flags |= RTE_DEADGW;

                                    RTE = FindRTE(Dest, Src, 0,
                                                  DEFAULT_ROUTE_PRI,
                                                  DEFAULT_ROUTE_PRI, UnicastIf);

                                    OldRTE->rte_flags &= ~RTE_DEADGW;

                                    if (RTE == NULL) {
                                        // No more default gateways! This is bad.
                                        // KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL," ---No more def routes!\n"));
                                        //                   ASSERT(FALSE);
                                        ValidateDefaultGWs(NULL_IP_ADDR);
                                        //ASSERT(DefGWActive == DefGWConfigured);
                                    } else {
                                        ASSERT(RTE->rte_mask == DEFAULT_MASK);

                                        //remember the new gw until we transition fully

                                        OldRTE->rte_todg = RTE;
                                        RTE->rte_fromdg = OldRTE;

                                        // KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"FoundGW %x\n",RTE));

                                        //Attach this RCE to use the new RTE

                                        AttachRCEToNewRTE(RTE, RCE, OldRTE);

                                        RTE->rte_valid = Now;
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
        CTEFreeLock(&RouteTableLock.Lock, Handle);
    }
}

//** FindRCE - Find an RCE on an RTE.
//
//  A routine to find an RCE that's chained on an RTE. We assume the lock
//  is held on the RTE.
//
//  Entry:  RTE             - RTE to search.
//          Dest            - Destination address of RTE to find.
//          Src             - Source address of RTE to find.
//
//  Returns: Pointer to RTE found, or NULL.
//
RouteCacheEntry *
FindRCE(RouteTableEntry * RTE, IPAddr Dest, IPAddr Src)
{
    RouteCacheEntry *CurrentRCE;

    ASSERT(!IP_ADDR_EQUAL(Src, NULL_IP_ADDR));
    for (CurrentRCE = RTE->rte_rcelist; CurrentRCE != NULL;
         CurrentRCE = CurrentRCE->rce_next) {
        if (IP_ADDR_EQUAL(CurrentRCE->rce_dest, Dest) &&
            IP_ADDR_EQUAL(CurrentRCE->rce_src, Src)) {
            break;
        }
    }
    return CurrentRCE;

}


//** OpenRCE - Open an RCE for a specific route.
//
//  Called by the upper layer to open an RCE. We look up the type of the address
//  - if it's invalid, we return 'Destination invalid'. If not, we look up the
//  route, fill in the RCE, and link it on the correct RTE.
//
//  As an added bonus, this routine will return the local address to use
//  to reach the destination.
//
//  Entry:  Address         - Address for which we are to open an RCE.
//          Src             - Source address we'll be using.
//          RCE             - Pointer to where to return pointer to RCE.
//          Type            - Pointer to where to return destination type.
//          MSS             - Pointer to where to return MSS for route.
//          OptInfo         - Pointer to option information, such as TOS and
//                              any source routing info.
//
//  Returns: Source IP address to use. This will be NULL_IP_ADDR if the
//          specified destination is unreachable for any reason.
//
IPAddr
OpenRCE(IPAddr Address, IPAddr Src, RouteCacheEntry ** RCE, uchar * Type,
        ushort * MSS, IPOptInfo * OptInfo)
{
    RouteTableEntry *RTE;        // Pointer to RTE to put RCE on.
    CTELockHandle TableLock;
    uchar LocalType;
    NetTableEntry *RealNTE = NULL;
    uint ConstrainIF = 0;

    if (!IP_ADDR_EQUAL(OptInfo->ioi_addr, NULL_IP_ADDR))
        Address = OptInfo->ioi_addr;

    CTEGetLock(&RouteTableLock.Lock, &TableLock);

    LocalType = GetAddrType(Address);

    *Type = LocalType;

    // If the specified address isn't invalid, continue.
    if (LocalType != DEST_INVALID) {
        RouteCacheEntry *NewRCE;

        // If he's specified a source address, loop through the NTE table
        // now and make sure it's valid.
        if (!IP_ADDR_EQUAL(Src, NULL_IP_ADDR)) {
            NetTableEntry *NTE;

            NetTableEntry *NetTableList = NewNetTableList[NET_TABLE_HASH(Src)];
            for (NTE = NetTableList; NTE != NULL; NTE = NTE->nte_next)
                if ((NTE->nte_flags & NTE_VALID) &&
                    IP_ADDR_EQUAL(Src, NTE->nte_addr))
                    break;

            if (NTE == NULL) {
                // Didn't find a match.
                CTEFreeLock(&RouteTableLock.Lock, TableLock);
                return NULL_IP_ADDR;
            }
            // Decide whether to do a strong or weak host lookup
            // No need to do this in case of unidirectional adapter.
            // On unidirectional adapter sends are not permitted.
            // If this openrce is called before setting specific mcast
            // Address (ioi_mcastif) GetIfConstraint for mcast will fail.
            // For W9x backward compatibility reasons, we will let
            // OpenRce succeed even if ioi_mcast if is not set, as an
            // exception in the case of unidirectional adapter. Side effect
            // of this will be - when a send is attempted on this endpoint
            // with this cached rce, it will go out on a random interface.
            //

            if (!(NTE->nte_if->if_flags & IF_FLAGS_UNI)) {
                ConstrainIF = GetIfConstraint(Address, Src, OptInfo, FALSE);
            }


        } else {
            ConstrainIF = GetIfConstraint(Address, Src, OptInfo, FALSE);
        }


        // Find the route for this guy. If we can't find one, return NULL.
        if (IP_LOOPBACK_ADDR(Src)) {

            RTE = LookupRTE(Src, Src, HOST_ROUTE_PRI, ConstrainIF);

            if (RTE) {
                ASSERT(RTE->rte_if == &LoopInterface);
            } else {
                KdPrint(("No Loopback rte!\n"));
                ASSERT(0);
            }

        } else {
            RTE = LookupRTE(Address, Src, HOST_ROUTE_PRI, ConstrainIF);
        }

        if (RTE != (RouteTableEntry *) NULL) {
            CTELockHandle RCEHandle;
            RouteCacheEntry *OldRCE;

            //
            // Make sure interface is not shutting down. Should we also check for
            // IF_FLAGS_DELETING?
            //
            if (IS_IF_INVALID(RTE->rte_if) && RTE->rte_if->if_ntecount) {
                CTEFreeLock(&RouteTableLock.Lock, TableLock);
                return NULL_IP_ADDR;
            }

            if (OptInfo->ioi_uni) {

                //LookupRTE returns first route n the chain of
                //unnumbered ifs.
                //if this is not the one desired, scan further

                RouteTableEntry *tmpRTE = RTE;

                while (tmpRTE && (tmpRTE->rte_if->if_index != OptInfo->ioi_uni)) {
                    tmpRTE = tmpRTE->rte_next;
                }

                if (!tmpRTE) {

                    KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"OpenRCE:No matching unnumbered interface %d\n", OptInfo->ioi_uni));
                    CTEFreeLock(&RouteTableLock.Lock, TableLock);
                    return NULL_IP_ADDR;
                } else
                    RTE = tmpRTE;
            }

            // We found one.

            // if the route is on a P2MP interface get the mtu from the link associated with the route
            if (RTE->rte_link)
                *MSS = (ushort) RTE->rte_link->link_mtu;
            else
                *MSS = (ushort) RTE->rte_mtu;    // Return the route MTU.


            if (IP_LOOPBACK_ADDR(Src) && (RTE->rte_if != &LoopInterface)) {
                // The upper layer is sending from a loopback address, but the
                // destination isn't reachable through the loopback interface.
                // Fail the request.
                CTEFreeLock(&RouteTableLock.Lock, TableLock);
                return NULL_IP_ADDR;
            }
            // We have the RTE. Fill in the RCE, and link it on the RTE.
            if (!IP_ADDR_EQUAL(RTE->rte_addr, IPADDR_LOCAL))
                *Type |= DEST_OFFNET_BIT;    // Tell upper layer it's off
            // net.

            //
            // If no source address was specified, then use the best address
            // for the interface. This will generally prevent dynamic NTE's from
            // being chosen as the source for wildcard binds.
            //
            if (IP_ADDR_EQUAL(Src, NULL_IP_ADDR)) {

                if (LocalType == DEST_LOCAL) {
                    Src = Address;
                    RealNTE = LoopNTE;
                } else {
                    NetTableEntry *SrcNTE;

                    if ((RTE->rte_if->if_flags & IF_FLAGS_NOIPADDR) && (IP_ADDR_EQUAL(RTE->rte_if->if_nte->nte_addr, NULL_IP_ADDR))) {

                        Src = g_ValidAddr;
                        if (IP_ADDR_EQUAL(Src, NULL_IP_ADDR)) {

                            CTEFreeLock(&RouteTableLock.Lock, TableLock);
                            return NULL_IP_ADDR;
                        }
                    } else {

                        SrcNTE = BestNTEForIF(
                                              ADDR_FROM_RTE(RTE, Address),
                                              RTE->rte_if
                                              );

                        if (SrcNTE == NULL) {
                            // Can't find an address! Fail the request.
                            CTEFreeLock(&RouteTableLock.Lock, TableLock);
                            return NULL_IP_ADDR;
                        }
                        Src = SrcNTE->nte_addr;
                    }
                }
            }
            // Now, see if an RCE already exists for this.

            if (RCE == NULL) {

                KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"Openrce with null RCE!! %x\n",Src));

                CTEFreeLock(&RouteTableLock.Lock, TableLock);
                return Src;
            }

            if ((OldRCE = FindRCE(RTE, Address, Src)) == NULL) {

                // Don't have an existing RCE. See if we can get a new one,
                // and fill it in.

                NewRCE = CTEAllocMemNBoot(sizeof(RouteCacheEntry), 'AiCT');
                *RCE = NewRCE;

                if (NewRCE != NULL) {
                    RtlZeroMemory(NewRCE, sizeof(RouteCacheEntry));

                    NewRCE->rce_src = Src;
                    NewRCE->rce_dtype = LocalType;
                    NewRCE->rce_cnt = 1;
                    CTEInitLock(&NewRCE->rce_lock);
                    NewRCE->rce_dest = Address;
                    NewRCE->rce_rte = RTE;
                    NewRCE->rce_flags = RCE_VALID;
                    if (RTE->rte_flags & RTE_IF_VALID) {
                        NewRCE->rce_flags |= RCE_CONNECTED;
                        //* Update the ref. count for this interface.
                        NewRCE->rce_flags |= RCE_REFERENCED;
                        LOCKED_REFERENCE_IF(RTE->rte_if);
                        // We register the chksum capability of the interface
                        // associated with this RCE, because interface definitions
                        // are transparent to TCP or UDP.

                        if (!IPSecStatus) {

                            NewRCE->rce_OffloadFlags = RTE->rte_if->if_OffloadFlags;
                        } else {

                            NewRCE->rce_OffloadFlags = 0;
                        }

                        NewRCE->rce_TcpLargeSend.MaxOffLoadSize = RTE->rte_if->if_MaxOffLoadSize;
                        NewRCE->rce_TcpLargeSend.MinSegmentCount = RTE->rte_if->if_MaxSegments;

                        NewRCE->rce_TcpWindowSize = RTE->rte_if->if_TcpWindowSize;
                        NewRCE->rce_TcpInitialRTT = RTE->rte_if->if_TcpInitialRTT;
                        NewRCE->rce_TcpDelAckTicks = RTE->rte_if->if_TcpDelAckTicks;
                        NewRCE->rce_TcpAckFrequency = RTE->rte_if->if_TcpAckFrequency;
                        NewRCE->rce_mediaspeed = RTE->rte_if->if_speed;
                    }            //RTE_IF_VALID

                    NewRCE->rce_next = RTE->rte_rcelist;
                    RTE->rte_rcelist = NewRCE;

                    RTE->rte_rces++;

                    CTEFreeLock(&RouteTableLock.Lock, TableLock);

                    return Src;
                } else {
                    // alloc failed
                    CTEFreeLock(&RouteTableLock.Lock, TableLock);

                    return NULL_IP_ADDR;
                }

            } else {
                // We have an existing RCE. We'll return his source as the
                // valid source, bump the reference count, free the locks
                // and return.
                CTEGetLock(&OldRCE->rce_lock, &RCEHandle);
                OldRCE->rce_cnt++;
                *RCE = OldRCE;

                if (OldRCE->rce_newmtu) {
                    *MSS = (USHORT) OldRCE->rce_newmtu;
                }
                OldRCE->rce_rte->rte_rces++;

                CTEFreeLock(&OldRCE->rce_lock, RCEHandle);
                CTEFreeLock(&RouteTableLock.Lock, TableLock);
                return Src;
            }
        } else {
            CTEFreeLock(&RouteTableLock.Lock, TableLock);
            return NULL_IP_ADDR;
        }
    }
    CTEFreeLock(&RouteTableLock.Lock, TableLock);
    return NULL_IP_ADDR;
}

void
FreeRCEToList(RouteCacheEntry * RCE)
/*++

Routine Description:

    Free RCE to the RCEFreeList (since the use_cnt on it is non zero)
    Called with routetable lock held
    Arguments:

    RCE : RCE to free

    Return Value:

    None

--*/
{

    // link this new interface at the front of the list

    RCE->rce_next = RCEFreeList;
    RCEFreeList = RCE;

    return;
}

//* CloseRCE - Close an RCE.
//
//  Called by the upper layer when it wants to close the RCE. We unlink it from
//  the RTE.
//
//  Entry:  RCE     - Pointer to the RCE to be closed.
//
//  Exit: Nothing.
//
void
CloseRCE(RouteCacheEntry * RCE)
{
    RouteTableEntry *RTE;        // Route on which RCE is linked.
    RouteCacheEntry *PrevRCE;
    CTELockHandle TableLock;    // Lock handles used.
    CTELockHandle RCEHandle;
    Interface *IF;
    Interface *tmpif = NULL;
    uint FreetoRCEFreeList = 0;

    if (RCE != NULL) {
        CTEGetLock(&RouteTableLock.Lock, &TableLock);
        CTEGetLock(&RCE->rce_lock, &RCEHandle);

        if ((RCE->rce_flags & RCE_VALID) && !(RCE->rce_flags & RCE_LINK_DELETED)) {
            RCE->rce_rte->rte_rces--;
        }

        if (--RCE->rce_cnt == 0) {
            // ASSERT(RCE->rce_usecnt == 0);
            ASSERT(*(int *)&(RCE->rce_usecnt) >= 0);
            if ((RCE->rce_flags & RCE_VALID) && !(RCE->rce_flags & RCE_LINK_DELETED)) {

                // The RCE is valid, so we have a valid RTE in the pointer
                // field. Walk down the RTE rcelist, looking for this guy.

                RTE = RCE->rce_rte;
                tmpif = IF = RTE->rte_if;

                PrevRCE = STRUCT_OF(RouteCacheEntry, &RTE->rte_rcelist,
                                    rce_next);

                // Walk down the list until we find him.
                while (PrevRCE != NULL) {
                    if (PrevRCE->rce_next == RCE)
                        break;
                    PrevRCE = PrevRCE->rce_next;
                }

                ASSERT(PrevRCE != NULL);

                if(PrevRCE) {

                    PrevRCE->rce_next = RCE->rce_next;
                }


            } else {

                //Make sure if the interface pointed by RCE
                //is still there
                tmpif = IFList;

                IF = (Interface *) RCE->rce_rte;

                while (tmpif) {

                    if (tmpif == IF)
                        break;
                    tmpif = tmpif->if_next;
                }

            }

            if (tmpif) {

                if (RCE->rce_flags & RCE_CONNECTED) {
                    (*(IF->if_invalidate)) (IF->if_lcontext, RCE);
                } else {
                    UnConnected++;
                    UnConnectedRCE = RCE;
                    (*(IF->if_invalidate)) (IF->if_lcontext, RCE);
                }

                if (RCE->rce_usecnt != 0) {
                    // free to the free list
                    // and check in timer if the usecnt has fallen to 0, if yes free it
                    FreetoRCEFreeList = 1;
                } else {
                    if (RCE->rce_flags & RCE_REFERENCED) {
                        LockedDerefIF(IF);
                    }
                }

                CTEFreeLock(&RCE->rce_lock, RCEHandle);

                if (FreetoRCEFreeList) {
                    RCE->rce_rte = (RouteTableEntry *) IF;
                    FreeRCEToList(RCE);
                } else {
                    CTEFreeMem(RCE);
                }

            } else {            //tmpif==NULL

                CTEFreeLock(&RCE->rce_lock, RCEHandle);

            }

            CTEFreeLock(&RouteTableLock.Lock, TableLock);

        } else {
            CTEFreeLock(&RCE->rce_lock, RCEHandle);
            CTEFreeLock(&RouteTableLock.Lock, TableLock);
        }
    }
}

//* LockedAddRoute - Add a route to the routing table.
//
//  Called by AddRoute to add a route to the routing table. We assume the
//  route table lock is already held. If the route to be added already exists
//  we update it. Routes are identified by a (Destination, Mask, FirstHop,
//  Interface) tuple. If an exact match exists we'll update the metric, which
//  may cause us to promote RCEs from other RTEs, or we may be demoted in which
//  case we'll invalidate our RCEs and let them be reassigned at transmission
//  time.
//
//  If we have to create a new RTE we'll do so, and find the best previous
//  RTE, and promote RCEs from that one to the new one.
//
//  The route table is an open hash structure. Within each hash chain the
//  RTEs with the longest masks (the 'priority') come first, and within
//  each priority the RTEs with the smallest metric come first.
//
//  Entry:  Destination - Destination address for which route is being added.
//          Mask        - Mask for destination.
//          FirstHop    - First hop for address. Could be IPADDR_LOCAL.
//          OutIF       - Pointer to outgoing I/F.
//          MTU         - Maximum MTU for this route.
//          Metric      - Metric for this route.
//          Proto       - Protocol type to store in route.
//          AType       - Administrative type of route.
//          Context     - context to be associated with the route
//          SetWithRefcnt - indicates the route should be referenced
//                        on the creator's behalf.
//          RNO         - optionally supplies a route-notification structure
//                        to be filled on output with details for the new route
//
//  Returns: Status of attempt to add route.
//
IP_STATUS
LockedAddRoute(IPAddr Destination, IPMask Mask, IPAddr FirstHop,
               Interface * OutIF, uint MTU, uint Metric, uint Proto, uint AType,
               ROUTE_CONTEXT Context, BOOLEAN SetWithRefcnt,
               IPRouteNotifyOutput* RNO)
{
    uint            RouteType;  // SNMP route type.
    RouteTableEntry *NewRTE, *OldRTE; // Entries for new and previous RTEs.
    RouteTableEntry *PrevRTE;   // Pointer to previous RTE.
    CTELockHandle   RCEHandle;  // Lock handle for RCEs.
    uint            OldMetric;  // Previous metric in use.
    uint            OldPriority; // Priority of previous route to destination.
    RouteCacheEntry *CurrentRCE; // Current RCE being examined.
    RouteCacheEntry *PrevRCE;   // Previous RCE examined.
    Interface       *IF;        // Interface being added on.
    uint            Priority;   // Priority of the route.
    uint            TempMask;   // Temporary copy of the mask.
    uint            Now = CTESystemUpTime() / 1000L; // System up time,
                                // in seconds.
    uint            MoveAny;    // TRUE if we'll move any RCE.
    ushort          OldFlags;
    Interface       *OldIF = NULL;
    ULONG           status;
    ULONG           matchFlags;
    RouteTableEntry *pOldBestRTE;
    RouteTableEntry *pNewBestRTE;

    LinkEntry *Link;

    IPAddr AllSNBCast;
    IPMask TmpMask;

    // OutIF is ref'd so it can't go away

    Link = OutIF->if_link;


    // If Metric is 0, set the metric to interface metric

    if (Metric == 0) {
        Metric = OutIF->if_metric;
    }


    // Do the following only if the interface is not a dummy interface

    if (OutIF != (Interface *) & DummyInterface) {
        // Check we are adding a multicast route

        if (IP_ADDR_EQUAL(Destination, MCAST_DEST) &&
            (OutIF->if_iftype & DONT_ALLOW_MCAST))
            return IP_SUCCESS;

        if (OutIF->if_iftype & DONT_ALLOW_UCAST) {

            // Check whether we are adding a ucast route

            TmpMask = IPNetMask(OutIF->if_nte->nte_addr);
            AllSNBCast =
                (OutIF->if_nte->nte_addr & TmpMask) |
                (OutIF->if_bcast & ~TmpMask);
            if (!(IP_ADDR_EQUAL(Destination, OutIF->if_bcast) ||
                  IP_ADDR_EQUAL(Destination, AllSNBCast) ||
                  IP_ADDR_EQUAL(Destination, MCAST_DEST))) {
                // this is not a bcast/mcast route: this is a ucast route
                return IP_SUCCESS;
            }
        }
    }

    // First do some consistency checks. Make sure that the Mask and
    // Destination agree.
    if (!IP_ADDR_EQUAL(Destination & Mask, Destination))
        return IP_BAD_DESTINATION;

    if (AType != ATYPE_PERM && AType != ATYPE_OVERRIDE && AType != ATYPE_TEMP)
        return IP_BAD_REQ;

    // If the interface is marked as going away, fail this.
    if (OutIF->if_flags & IF_FLAGS_DELETING) {
        return IP_BAD_REQ;
    }

    RouteType = IP_ADDR_EQUAL(FirstHop, IPADDR_LOCAL) ? IRE_TYPE_DIRECT :
        IRE_TYPE_INDIRECT;

    // If this is a route that is being added on an interface that has no
    // IP address, mark this as IRE_TYPE_DIRECT. This is true only for
    // P2P or P2MP interface, where route is plumbed and then address
    // is added due to a perf reason.


    if (((OutIF->if_flags & IF_FLAGS_P2P) ||
         (OutIF->if_flags & IF_FLAGS_P2MP)) &&
        OutIF->if_nte && (OutIF->if_nte->nte_flags & NTE_VALID) &&
        (IP_ADDR_EQUAL(OutIF->if_nte->nte_addr,NULL_IP_ADDR))) {
            RouteType = IRE_TYPE_DIRECT;
    }

    MTU = MAX(MTU, MIN_VALID_MTU);

    // If the outgoing interface has NTEs attached but none are valid, fail
    // this request unless it's a request to add the broadcast route.
    if (OutIF != (Interface *) & DummyInterface) {
        if (OutIF->if_ntecount == 0 && OutIF->if_nte != NULL &&
            !IP_ADDR_EQUAL(Destination, OutIF->if_bcast) &&
            !(OutIF->if_flags & IF_FLAGS_NOIPADDR)) {
            // This interface has NTEs attached, but none are valid. Fail the
            // request.
            return IP_BAD_REQ;
        }
    }
    if (OutIF->if_flags & IF_FLAGS_P2MP) {

        while (Link) {
            if ((Link->link_NextHop == FirstHop) ||
                ((Link->link_NextHop == Destination) &&
                 (FirstHop == IPADDR_LOCAL))) {
                break;
            }
            Link = Link->link_next;
        }

        if (!Link)
            return IP_GENERAL_FAILURE;
    }

    DEBUGMSG(DBG_INFO && DBG_IP && DBG_ROUTE,
         (DTEXT("LockedAddRoute:  D = %08x, M = %08x, NH = %08x, IF = %08x\n")
          DTEXT("\t\tMTU = %x, Met = %08x, Prot = %08x, AT = %08x, C = %08x\n"),
          Destination, Mask, FirstHop, OutIF, MTU, Metric, Proto, AType,
          Context));

    // Insert the route in the proper place depending on the dest, metric
    // Match next-hop (and interface if not a demand-dial route)
    matchFlags = MATCH_NHOP;

    if (!Context) {
        matchFlags |= MATCH_INTF;
    }
    status = InsRoute(Destination, Mask, FirstHop, OutIF, Metric,
                      matchFlags, &NewRTE, &pOldBestRTE, &pNewBestRTE);

    if (status != IP_SUCCESS) {
        return status;
    }
    // Has a best route been replaced
    if ((pOldBestRTE) && (pOldBestRTE != pNewBestRTE)) {
        InvalidateRCEChain(pOldBestRTE);

        // If the replaced route is a default gateway,
        // we may need to switch connections to the new entry.
        // To do so, we retrieve the current default gateway,
        // invalidate all its RCEs, and revalidate all gateways
        // to restart the dead-gateway detection procedure.

        if (pOldBestRTE->rte_mask == DEFAULT_MASK) {
            ValidateDefaultGWs(NULL_IP_ADDR);
        }
    }

    // Copy old route's parameters now
    OldFlags = NewRTE->rte_flags;

    if (!(NewRTE->rte_flags & RTE_NEW)) {

        OldMetric = NewRTE->rte_metric;
        OldPriority = NewRTE->rte_priority;
        OldIF = NewRTE->rte_if;

        if (Metric >= OldMetric && (OldFlags & RTE_VALID)) {
            InvalidateRCEChain(NewRTE);
        }
        if (SetWithRefcnt) {
            ASSERT(NewRTE->rte_refcnt > 0);
            NewRTE->rte_refcnt++;
        }
    } else {
        // this is a new RTE
        NewRTE->rte_refcnt = 1;
    }

    // If this is P2MP, chain this RTE on link
    if (Link && (NewRTE->rte_link == NULL)) {

        //
        // This RTE is not on the link
        // Insert the route in the linkrte chain
        //

        NewRTE->rte_nextlinkrte = Link->link_rte;
        Link->link_rte = NewRTE;
        NewRTE->rte_link = Link;
    }


    // Update fields in the new/old route
    NewRTE->rte_addr = FirstHop;
    NewRTE->rte_mtu = MTU;
    NewRTE->rte_metric = Metric;
    NewRTE->rte_type = (ushort) RouteType;
    NewRTE->rte_if = OutIF;

    NewRTE->rte_flags &= ~RTE_NEW;
    NewRTE->rte_flags |= RTE_VALID;
    NewRTE->rte_flags &= ~RTE_INCREASE;
    if (OutIF != (Interface *) & DummyInterface) {
        NewRTE->rte_flags |= RTE_IF_VALID;
        SortRoutesInDestByRTE(NewRTE);
    } else
        NewRTE->rte_flags &= ~RTE_IF_VALID;

    NewRTE->rte_admintype = AType;
    NewRTE->rte_proto = Proto;
    NewRTE->rte_valid = Now;
    NewRTE->rte_mtuchange = Now;
    NewRTE->rte_context = Context;


    // Check if this is a new route or an old one
    if (OldFlags & RTE_NEW) {
        // Reset few fields in new route

        NewRTE->rte_todg = NULL;
        NewRTE->rte_fromdg = NULL;
        NewRTE->rte_rces = 0;

        RtlZeroMemory(NewRTE->rte_arpcontext, sizeof(RCE_CONTEXT_SIZE));

        IPSInfo.ipsi_numroutes++;

        if (NewRTE->rte_mask == DEFAULT_MASK) {
            // A default route.
            DefGWConfigured++;
            DefGWActive++;
            UpdateDeadGWState();
        }
    } else {

        // If the RTE is for a default gateway and the old flags indicate
        // he wasn't valid then we're essentially creating a new active
        // default gateway. So bump up the active default gateway count.
        if (NewRTE->rte_mask == DEFAULT_MASK) {
            if (!(OldFlags & RTE_VALID)) {
                DefGWActive++;
                UpdateDeadGWState();

                // Reset few fields in this route

                NewRTE->rte_todg = NULL;
                NewRTE->rte_fromdg = NULL;
                NewRTE->rte_rces = 0;
            }
        }
    }

    // If a route-notification structure was supplied, fill it in.

    if (RNO) {
        RNO->irno_dest = NewRTE->rte_dest;
        RNO->irno_mask = NewRTE->rte_mask;
        RNO->irno_nexthop = GetNextHopForRTE(NewRTE);
        RNO->irno_proto = NewRTE->rte_proto;
        RNO->irno_ifindex = OutIF->if_index;
        RNO->irno_metric = NewRTE->rte_metric;
        if (OldFlags & RTE_NEW) {
            RNO->irno_flags = IRNO_FLAG_ADD;
        }
    }

    return IP_SUCCESS;
}

//* AddRoute - Add a route to the routing table.
//
//  This is just a shell for the real add route routine. All we do is take
//  the route table lock, and call the LockedAddRoute routine to deal with
//  the request. This is done this way because there are certain routines that
//  need to be able to atomically examine and add routes.
//
//  Entry:  Destination - Destination address for which route is being
//                            added.
//          Mask        - Mask for destination.
//          FirstHop    - First hop for address. Could be IPADDR_LOCAL.
//          OutIF       - Pointer to outgoing I/F.
//          MTU         - Maximum MTU for this route.
//          Metric      - Metric for this route.
//          Proto       - Protocol type to store in route.
//          AType       - Administrative type of route.
//          Context     - Context for this route.
//
//  Returns: Status of attempt to add route.
//
IP_STATUS
AddRoute(IPAddr Destination, IPMask Mask, IPAddr FirstHop,
         Interface * OutIF, uint MTU, uint Metric, uint Proto, uint AType,
         ROUTE_CONTEXT Context, uint Flags)
{
    CTELockHandle       TableHandle;
    IP_STATUS           Status;
    BOOLEAN             SkipExNotifyQ = FALSE;
    IPRouteNotifyOutput RNO = {0};

    if ((Flags & RT_EXCLUDE_LOCAL) && Proto == IRE_PROTO_LOCAL) {
        return IP_BAD_REQ;
    }

    CTEGetLock(&RouteTableLock.Lock, &TableHandle);

    if (Flags & RT_NO_NOTIFY) {
        SkipExNotifyQ = TRUE;
    }
    Status = LockedAddRoute(Destination, Mask, FirstHop, OutIF, MTU, Metric,
                            Proto, AType, Context,
                            (BOOLEAN)((Flags & RT_REFCOUNT) ? TRUE : FALSE),
                            &RNO);

    if (Status == IP_SUCCESS) {

        CTEFreeLock(&RouteTableLock.Lock, TableHandle);

#if FFP_SUPPORT
        FFPFlushRequired = TRUE;
#endif

        // Under certain conditions, LockedAddRoute returns IP_SUCCESS
        // even though no route was added. We catch such cases by examining
        // the interface index on output which, for true additions, should
        // always be non-zero.

        if (RNO.irno_ifindex) {
            if (!SkipExNotifyQ) {
                RtChangeNotifyEx(&RNO);
            }

            RtChangeNotify(&RNO);
        }
    } else {
        CTEFreeLock(&RouteTableLock.Lock, TableHandle);
    }
    return Status;
}

//* RtChangeNotify - Supply a route-change for notification to any clients
//
//  This routine is a shell around the address-/route-change notification
//  handler. It unpacks information about the changed route, and passes it
//  to the common handler specifying the route-change notification queue
//  as the source for pending client-requests.
//
//  Entry:  RNO         - describes the route-notification event
//
//  Returns: nothing.
//
void
RtChangeNotify(IPRouteNotifyOutput *RNO)
{
    ChangeNotify((IPNotifyOutput *)RNO, &RtChangeNotifyQueue,
                 &RouteTableLock.Lock);
}

//* RtChangeNotifyEx - Supply a route-change for notification to any clients
//
//  This routine is a shell around the address-/route-change notification
//  handler. It unpacks information about the changed route, and passes it
//  to the common handler specifying the extended route-change notification
//  queue as the source for pending client-requests.
//
//  Entry:  RNO         - describes the route-notification event
//
//  Returns: nothing.
//
void
RtChangeNotifyEx(IPRouteNotifyOutput *RNO)
{
    ChangeNotify((IPNotifyOutput *)RNO, &RtChangeNotifyQueueEx,
                 &RouteTableLock.Lock);
}

//* ChangeNotifyAsync -  Supply a change for notification
//
//  This routine is a handler for a deferred change-notification. It unpacks
//  information about the change, and passes it to the common handler.
//
//  Entry:  Event       - CTEEvent for the deferred call
//          Context     - context containing information about the change
//
//  Returns: nothing.
//
void
ChangeNotifyAsync(CTEEvent *Event, PVOID Context)
{
    ChangeNotifyEvent *CNE = (ChangeNotifyEvent *)Context;
    ChangeNotify(&CNE->cne_info, CNE->cne_queue, CNE->cne_lock);
    CTEFreeMem(Context);
}

//* ChangeNotifyClientInQueue - See if a client is in a notification queue
//
//  This is a utility routine called by ChangeNotify to determine
//  if a given client, identified by a file object, has a request
//  in a given notification queue.
//
//  Entry:  FileObject      - identifies the client
//          NotifyQueue     - contains a list of requests to be searched
//
//  Returns: TRUE if the client is present, FALSE otherwise.
//
BOOLEAN
ChangeNotifyClientInQueue(PFILE_OBJECT FileObject, PLIST_ENTRY NotifyQueue)
{
    PLIST_ENTRY         ListEntry;
    PIRP                Irp;
    PIO_STACK_LOCATION  IrpSp;

    for (ListEntry = NotifyQueue->Flink; ListEntry != NotifyQueue;
         ListEntry = ListEntry->Flink) {
        Irp = CONTAINING_RECORD(ListEntry, IRP, Tail.Overlay.ListEntry);
        IrpSp = IoGetCurrentIrpStackLocation(Irp);
        if (FileObject == IrpSp->FileObject) {
            return TRUE;
        }
    }

    return FALSE;
}

//* ChangeNotify -  Notify about a route change
//
//  This routine is the common handler for change notifications.
//  It takes a description of a change, and searches the specified queue
//  for a pending client-request that corresponds to the changed item.
//
//  Entry:  NotifyOutput    - contains information about the change event
//          NotifyQueue     - supplies the queue in which to search for clients
//          Lock            - supplies the lock protecting 'NotifyQueue'.
//
//  Returns: nothing.
//
void
ChangeNotify(IPNotifyOutput* NotifyOutput, PLIST_ENTRY NotifyQueue, PVOID Lock)
{
    IPAddr              Add = NotifyOutput->ino_addr;
    IPMask              Mask = NotifyOutput->ino_mask;
    PIRP                Irp;
    CTELockHandle       LockHandle;
    PLIST_ENTRY         ListEntry;
    uint                i;
    PIPNotifyData       NotifyData;
    LIST_ENTRY          LocalNotifyQueue;
    PIO_STACK_LOCATION  IrpSp;
    BOOLEAN             synchronizeWithCancelRoutine = FALSE;

    // See if we're being invoked it dispatch IRQL and, if so,
    // defer the notification to a worker thread.
    //
    // N.B. We do this *without* touching 'Lock' which might already
    // be held by the caller.

    if (KeGetCurrentIrql() >= DISPATCH_LEVEL) {
        ChangeNotifyEvent *CNE;
        CNE = CTEAllocMemNBoot(sizeof(ChangeNotifyEvent), 'xiCT');
        if (CNE) {
            CNE->cne_info = *NotifyOutput;
            CNE->cne_queue = NotifyQueue;
            CNE->cne_lock = Lock;
            CTEInitEvent(&CNE->cne_event, ChangeNotifyAsync);
            CTEScheduleDelayedEvent(&CNE->cne_event, CNE);
        }
        return;
    }

    // Examine the list of pending change-notification requeusts
    // to see if any of them match the parameters of the current event.

    InitializeListHead(&LocalNotifyQueue);
    CTEGetLock(Lock, &LockHandle);

    for (ListEntry = NotifyQueue->Flink; ListEntry != NotifyQueue; ) {

        Irp = CONTAINING_RECORD(ListEntry, IRP, Tail.Overlay.ListEntry);
        IrpSp = IoGetCurrentIrpStackLocation(Irp);

        // Determine whether an input buffer was supplied and, if so,
        // pick it up to see if the event matches the notification request.

        if (IrpSp->Parameters.DeviceIoControl.InputBufferLength >=
                sizeof(IPNotifyData)) {
            NotifyData = Irp->AssociatedIrp.SystemBuffer;
        } else {
            NotifyData = NULL;
        }

        // Now determine whether we should consider this IRP at all.
        // We'll normally complete all matching IRPs when an event occurs,
        // but certain clients want only one matching IRP to be completed,
        // so they can maintain a backlog of IRPs to make sure that they don't
        // miss any events. Such clients set 'Synchronization' as the version
        // in their requests.

        if (NotifyData &&
            NotifyData->Version == IPNotifySynchronization &&
            ChangeNotifyClientInQueue(IrpSp->FileObject, &LocalNotifyQueue)) {
            ListEntry = ListEntry->Flink;
            continue;
        }

        // If no data was passed or it contains NULL address or an Address that
        // matches the address that was added or deleted, complete the irp

        if ((NotifyData == NULL) ||
            (NotifyData->Add == 0) ||
            ((NotifyData->Add & Mask) == (Add & Mask))) {

            //
            // We are going to remove the LE, so first save the Flink
            //
            ListEntry = ListEntry->Flink;

            RemoveEntryList(&Irp->Tail.Overlay.ListEntry);

            if (IoSetCancelRoutine(Irp, NULL) == NULL) {
                synchronizeWithCancelRoutine = TRUE;
            }

#if !MILLEN
            if (IrpSp->Parameters.DeviceIoControl.OutputBufferLength >=
                    sizeof(IPNotifyOutput)) {
                RtlCopyMemory(Irp->AssociatedIrp.SystemBuffer, NotifyOutput,
                              sizeof(IPNotifyOutput));
                Irp->IoStatus.Information = sizeof(IPNotifyOutput);
            } else {
                Irp->IoStatus.Information = 0;
            }
#else // !MILLEN
            // For Millennium, this is only called for RtChange queues now.
            //
            ASSERT(NotifyQueue == &RtChangeNotifyQueue);
            if (IrpSp->Parameters.DeviceIoControl.OutputBufferLength >=
                    sizeof(IP_RTCHANGE_NOTIFY)) {
                PIP_RTCHANGE_NOTIFY pReply = Irp->AssociatedIrp.SystemBuffer;
                pReply->Addr = Add;
                pReply->Mask = Mask;
                Irp->IoStatus.Information = sizeof(IP_RTCHANGE_NOTIFY);
            } else {
                Irp->IoStatus.Information = 0;
            }
#endif // MILLEN

            InsertTailList(&LocalNotifyQueue, &Irp->Tail.Overlay.ListEntry);
        } else {
            ListEntry = ListEntry->Flink;
        }
    }

    CTEFreeLock(Lock, LockHandle);

    if (!IsListEmpty(&LocalNotifyQueue)) {
        if (synchronizeWithCancelRoutine) {
            IoAcquireCancelSpinLock(&LockHandle);
            IoReleaseCancelSpinLock(LockHandle);
        }
        do {
            ListEntry = RemoveHeadList(&LocalNotifyQueue);
            Irp = CONTAINING_RECORD(ListEntry, IRP, Tail.Overlay.ListEntry);
            Irp->IoStatus.Status = STATUS_SUCCESS;
            IoCompleteRequest(Irp, IO_NETWORK_INCREMENT);
        } while (!IsListEmpty(&LocalNotifyQueue));
    }
}

//* RtChangeNotifyCancel - cancels a route-change notification request.
//
//  This routine is a wrapper around the common request-cancelation handler
//  for change-notification requests.
//
//  Returns: nothing.
//
void
RtChangeNotifyCancel(PDEVICE_OBJECT DeviceObject, PIRP Irp)
{
    CancelNotify(Irp, &RtChangeNotifyQueue, &RouteTableLock.Lock);
}

//* RtChangeNotifyCancelEx - cancels a route-change notification request.
//
//  This routine is a wrapper around the common request-cancelation handler
//  for change-notification requests.
//
//  Returns: nothing.
//
void
RtChangeNotifyCancelEx(PDEVICE_OBJECT DeviceObject, PIRP Irp)
{
    CancelNotify(Irp, &RtChangeNotifyQueueEx, &RouteTableLock.Lock);
}

//* CancelNotify - cancels a change-notification request.
//
//  This routine is the common handler for cancelation of change-notification
//  requests. It searches for the given request in the qiven queue and,
//  if found, completes it immediately with a cancelation status.
//
//  It is invoked with the I/O cancel spin-lock held by the caller,
//  and frees the cancel spin-lock before returning.
//
//  Entry:  Irp             - the I/O request packet for the request
//          NotifyQueue     - change-notification queue containing the request
//          Lock            - lock protecting 'NotifyQueue'.
//
//  Returns: nothing.
//
void
CancelNotify(PIRP Irp, PLIST_ENTRY NotifyQueue, PVOID Lock)
{
    CTELockHandle   LockHandle;
    PLIST_ENTRY     ListEntry;
    BOOLEAN         Found = FALSE;

    CTEGetLock(Lock, &LockHandle);
    for (ListEntry = NotifyQueue->Flink; ListEntry != NotifyQueue;
         ListEntry = ListEntry->Flink) {

        if (CONTAINING_RECORD(ListEntry, IRP, Tail.Overlay.ListEntry) == Irp) {
            RemoveEntryList(&Irp->Tail.Overlay.ListEntry);
            Found = TRUE;
            break;
        }
    }
    CTEFreeLock(Lock, LockHandle);

    IoReleaseCancelSpinLock(Irp->CancelIrql);

    if (Found) {
        Irp->IoStatus.Information = 0;
        Irp->IoStatus.Status = STATUS_CANCELLED;
        IoCompleteRequest(Irp, IO_NETWORK_INCREMENT);
    }
}

//* DeleteRoute - Delete a route from the routing table.
//
//  Called by upper layer or management code to delete a route from the routing
//  table. If we can't find the route we return an error. If we do find it, we
//  remove it, and invalidate any RCEs associated with it. These RCEs will be
//  reassigned the next time they're used. A route is uniquely identified by
//  a (Destination, Mask, FirstHop, Interface) tuple.
//
//  Entry:  Destination     - Destination address for which route is being
//                            deleted.
//          Mask            - Mask for destination.
//          FirstHop        - First hop on way to Destination.
//                            -1 means route is local.
//          OutIF           - Outgoing interface for route.
//          Flags           - selects various semantics for deletion.
//
//  Returns: Status of attempt to delete route.
//
IP_STATUS
DeleteRoute(IPAddr Destination, IPMask Mask, IPAddr FirstHop,
            Interface * OutIF, uint Flags)
{
    RouteTableEntry     *RTE;       // RTE being deleted.
    RouteTableEntry     *PrevRTE;   // Pointer to RTE in front of one
                                    // being deleted.
    CTELockHandle       TableLock;  // Lock handle for table.
    UINT                retval;
    RouteTableEntry     *pOldBestRTE;
    RouteTableEntry     *pNewBestRTE;
    BOOLEAN             DeleteDone = FALSE;
    IPRouteNotifyOutput RNO = {0};
    uint                MatchFlags = MATCH_FULL;

    // Look up the route by calling FindSpecificRTE. If we can't find it,
    // fail the call.
    CTEGetLock(&RouteTableLock.Lock, &TableLock);

    KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,
               "DeleteRoute: D = %08x, M = %08x, NH = %08x, IF = %08x\n",
               Destination, Mask, FirstHop, OutIF));

    if (Flags & RT_EXCLUDE_LOCAL) {
        MatchFlags |= MATCH_EXCLUDE_LOCAL;
    }
    if (Flags & RT_REFCOUNT) {
        RouteTableEntry *TempRTE;

        RTE = FindSpecificRTE(Destination, Mask, FirstHop, OutIF, &TempRTE,
                              FALSE);

        if (RTE) {
            ASSERT(RTE->rte_refcnt > 0);
            RTE->rte_refcnt--;
            if (!RTE->rte_refcnt) {
                retval = DelRoute(Destination, Mask, FirstHop, OutIF,
                                  MatchFlags, &RTE, &pOldBestRTE, &pNewBestRTE);
            } else {
                retval = IP_SUCCESS;
            }
        } else {
            retval = IP_BAD_ROUTE;
        }
    } else {

        retval = DelRoute(Destination, Mask, FirstHop, OutIF, MatchFlags,
                          &RTE, &pOldBestRTE, &pNewBestRTE);
    }

    if (retval == IP_SUCCESS) {
        if (!((Flags & RT_REFCOUNT) && RTE->rte_refcnt)) {

            RNO.irno_dest = RTE->rte_dest;
            RNO.irno_mask = RTE->rte_mask;
            RNO.irno_nexthop = GetNextHopForRTE(RTE);
            RNO.irno_proto = RTE->rte_proto;
            RNO.irno_ifindex = OutIF->if_index;
            RNO.irno_metric = RTE->rte_metric;
            RNO.irno_flags = IRNO_FLAG_DELETE;

            DeleteDone = TRUE;
            CleanupP2MP_RTE(RTE);
            CleanupRTE(RTE);
        }
    }

    CTEFreeLock(&RouteTableLock.Lock, TableLock);

#if FFP_SUPPORT
    FFPFlushRequired = TRUE;
#endif

    if (DeleteDone) {
        if (!(Flags & RT_NO_NOTIFY)) {
            RtChangeNotifyEx(&RNO);
        }
        RtChangeNotify(&RNO);
    }
    return retval;
}

//* DeleteRouteWithNoLock - utility routine called by DeleteDest
//
//  Called to remove a single route for a given destination.
//  It's assumed that this routine is called with the routing table lock held,
//  and that it doesn't release the route-table-lock as part of its operation.
//
//  Entry:  IRE         - describes the entry to be deleted
//          DeletedRTE  - contains a pointer to the deleted entry on output
//          Flags       - selects various semantics for deletion.
//
//  Returns: IP_SUCCESS if the entry to be deleted was found
//
IP_STATUS
DeleteRouteWithNoLock(IPRouteEntry * IRE, RouteTableEntry **DeletedRTE,
                      uint Flags)
{
    NetTableEntry       *OutNTE, *LocalNTE, *TempNTE;
    IPAddr              FirstHop, Dest, NextHop;
    uint                MTU;
    Interface           *OutIF;
    uint                Status;
    uint                i;
    RouteTableEntry     *RTE, *RTE1, *RTE2;
    IPRouteNotifyOutput RNO = {0};
    uint                MatchFlags = MATCH_FULL;

    *DeletedRTE = NULL;
    OutNTE = NULL;
    LocalNTE = NULL;

    Dest = IRE->ire_dest;
    NextHop = IRE->ire_nexthop;

    // Make sure that the nexthop is sensible. We don't allow nexthops
    // to be broadcast or invalid or loopback addresses.
    if (IP_LOOPBACK(NextHop) || CLASSD_ADDR(NextHop) || CLASSE_ADDR(NextHop))
        return IP_BAD_REQ;

    // Also make sure that the destination we're routing to is sensible.
    // Don't allow routes to be added to Class D or E or loopback
    // addresses.
    if (IP_LOOPBACK(Dest) || CLASSD_ADDR(Dest) || CLASSE_ADDR(Dest))
        return IP_BAD_REQ;

    if (IRE->ire_index == LoopIndex)
        return IP_BAD_REQ;

    if (IRE->ire_index != INVALID_IF_INDEX) {

        // First thing to do is to find the outgoing NTE for specified
        // interface, and also make sure that it matches the destination
        // if the destination is one of my addresses.

        for (i = 0; i < NET_TABLE_SIZE; i++) {
            NetTableEntry *NetTableList = NewNetTableList[i];
            for (TempNTE = NetTableList; TempNTE != NULL;
                 TempNTE = TempNTE->nte_next) {
                if ((OutNTE == NULL) && (TempNTE->nte_flags & NTE_VALID) && (IRE->ire_index == TempNTE->nte_if->if_index))
                    OutNTE = TempNTE;
                if (!IP_ADDR_EQUAL(NextHop, NULL_IP_ADDR) &&
                    IP_ADDR_EQUAL(NextHop, TempNTE->nte_addr) &&
                    (TempNTE->nte_flags & NTE_VALID))
                    LocalNTE = TempNTE;

                // Don't let a route be set through a broadcast address.
                if (IsBCastOnNTE(NextHop, TempNTE) != DEST_LOCAL)
                    return STATUS_INVALID_PARAMETER;

                // Don't let a route to a broadcast address be added or deleted.
                if (IsBCastOnNTE(Dest, TempNTE) != DEST_LOCAL)
                    return IP_BAD_REQ;
            }
        }

        // At this point OutNTE points to the outgoing NTE, and LocalNTE
        // points to the NTE for the local address, if this is a direct route.
        // Make sure they point to the same interface, and that the type is
        // reasonable.
        if (OutNTE == NULL)
            return IP_BAD_REQ;

        if (LocalNTE != NULL) {
            // He's routing straight out a local interface. The interface for
            // the local address must match the interface passed in, and the
            // type must be DIRECT (if we're adding) or INVALID (if we're
            // deleting).
            if (LocalNTE->nte_if->if_index != IRE->ire_index)
                return IP_BAD_REQ;

            if (IRE->ire_type != IRE_TYPE_DIRECT &&
                IRE->ire_type != IRE_TYPE_INVALID)
                return IP_BAD_REQ;
            OutNTE = LocalNTE;
        }
        // Figure out what the first hop should be. If he's routing straight
        // through a local interface, or the next hop is equal to the
        // destination, then the first hop is IPADDR_LOCAL. Otherwise it's the
        // address of the gateway.
        if ((LocalNTE != NULL) || IP_ADDR_EQUAL(NextHop, NULL_IP_ADDR))
            FirstHop = IPADDR_LOCAL;
        else if (IP_ADDR_EQUAL(Dest, NextHop))
            FirstHop = IPADDR_LOCAL;
        else
            FirstHop = NextHop;

        MTU = OutNTE->nte_mss;
        OutIF = OutNTE->nte_if;


        if (IP_ADDR_EQUAL(NextHop, NULL_IP_ADDR)) {

            if (!(OutIF->if_flags & IF_FLAGS_P2P)) {

                return IP_BAD_REQ;
            }
        }

    } else {
        OutIF = (Interface *) & DummyInterface;
        MTU = DummyInterface.ri_if.if_mtu - sizeof(IPHeader);
        if (IP_ADDR_EQUAL(Dest, NextHop))
            FirstHop = IPADDR_LOCAL;
        else
            FirstHop = NextHop;
    }

    KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"Calling DelRoute On :\n"));
    KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"\tDest = %p\n", Dest));
    KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,
               "\tMask = %p\n", IRE->ire_mask));
    KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"\tIntf = %p\n", OutIF));
    KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"\tNhop = %p\n\n", FirstHop));

    if (Flags & RT_EXCLUDE_LOCAL) {
        MatchFlags |= MATCH_EXCLUDE_LOCAL;
    }

    Status = DelRoute(Dest, IRE->ire_mask, FirstHop, OutIF, MatchFlags,
                      &RTE, &RTE1, &RTE2);
    KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"Status = %08x\n", Status));

    if (Status == IP_SUCCESS) {

        // Queue a route-change notification for the destination-removal.
        //
        // N.B. We are being called with the route-table-lock held;
        // this means we're at DISPATCH_LEVEL, and so the call below
        // to RtChangeNotify will schedule a deferred notification.
        // It definitely *must* not attempt to recursively acquire
        // the route-table-lock, since that would instantly deadlock.

        RNO.irno_dest = RTE->rte_dest;
        RNO.irno_mask = RTE->rte_mask;
        RNO.irno_nexthop = GetNextHopForRTE(RTE);
        RNO.irno_proto = RTE->rte_proto;
        RNO.irno_ifindex = OutIF->if_index;
        RNO.irno_metric = RTE->rte_metric;
        RNO.irno_flags = IRNO_FLAG_DELETE;
        RtChangeNotify(&RNO);

        CleanupP2MP_RTE(RTE);
        CleanupRTE(RTE);
        *DeletedRTE = RTE;
        return IP_SUCCESS;
    }

    return IP_BAD_REQ;
}

//* DeleteDest - delete all routes to a destination
//
//  Called to remove all routes to a given destination. This results
//  in the entry for the destination itself being removed.
//
//  Entry:  Dest    - identifies the destination to be removed
//          Mask    - supplies the mask for the destination
//
//  Returns: IP_SUCCESS if the destination was found
//
IP_STATUS
DeleteDest(IPAddr Dest, IPMask Mask)
{
    CTELockHandle   TableLock;
    RouteTableEntry *RTE, *NextRTE, *DeletedRTE;
    IP_STATUS       retval;
    IPRouteEntry    IRE;
    NetTableEntry   *SrcNTE;
    BOOLEAN         DeleteDone = FALSE;

    CTEGetLock(&RouteTableLock.Lock, &TableLock);

    do {
        // Begin by locating the first entry for the destination in question.
        // Once we find that, we'll use it to begin a loop in which all the
        // entries for the destination will be deleted.

        retval = SearchRouteInSTrie(RouteTable->sTrie, Dest, Mask, 0, NULL,
                                    MATCH_NONE, &RTE);

        if (retval != IP_SUCCESS) {
            break;
        }

        // Iteratively remove all routes on the destination.
        // Initialize the fields that are common to all the destination's
        // routes, and then iterate over the routes removing each one.

        IRE.ire_type = IRE_TYPE_INVALID;
        IRE.ire_dest = Dest;
        IRE.ire_mask = Mask;

        do {
            // Set the fields which are specific to the current entry
            // for the destination (the interface index and nexthop),
            // and pick up the entry *after* this entry (since we're about
            // to delete this entry) so we can continue our enumeration
            // once the current entry is removed.

            IRE.ire_index = RTE->rte_if->if_index;
            IRE.ire_nexthop = GetNextHopForRTE(RTE);

            NextRTE = RTE->rte_next;

            KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,
                       "Deleting RTE @ %p:\n", RTE));
            KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,
                       "Next in List = %p:\n", NextRTE));
            KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,
                       "Using an IRE @ %p\n", IRE));
            KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,
                       "\tDest = %08x\n", IRE.ire_dest));
            KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,
                       "\tMask = %08x\n", IRE.ire_mask));
            KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,
                       "\tIntf = %08x\n", IRE.ire_index));
            KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,
                       "\tNhop = %08x\n\n", IRE.ire_nexthop));

            // Delete the current entry. The deletion routine
            // takes care of notification, if any.

            retval = DeleteRouteWithNoLock(&IRE, &DeletedRTE, RT_EXCLUDE_LOCAL);
            if (retval == IP_SUCCESS) {
                DeleteDone = TRUE;
            }

            KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,
                       "Status = %08x, RTE = %p, DeletedRTE = %p\n",
                       retval, RTE, DeletedRTE));

            // Attempt to continue the enumeration by picking up
            // the next entry.

            if ((retval != IP_SUCCESS) || (RTE == DeletedRTE)) {

                // Either we are not allowed to delete this route
                // Or we deleted what we were expecting to delete

                RTE = NextRTE;
            } else {

                // We deleted an RTE thats further down the list
                // NextRTE might be pointing to this deleted RTE
                // Try to delete again and skip over RTE if cant
            }
        } while (RTE);

        retval = IP_SUCCESS;
    } while (FALSE);

    CTEFreeLock(&RouteTableLock.Lock, TableLock);

    if (DeleteDone) {
#if FFP_SUPPORT
        FFPFlushRequired = TRUE;
#endif
    }

    return retval;
}

//* Redirect - Process a redirect request.
//
//  This is the redirect handler . We treat all redirects as host redirects as
//  per the host requirements RFC. We make a few sanity checks on the new first
//  hop address, and then we look up the current route. If it's not through the
//  source of the redirect, just return.
//  If the current route to the destination is a host route, update the first
//  hop and return.
//  If the route is not a host route, remove any RCE for this route from the
//  RTE, create a host route and place the RCE (if any) on the new RTE.
//
//  Entry:  NTE         - Pointer to NetTableEntry for net on which Redirect
//                        arrived.
//          RDSrc       - IPAddress of source of redirect.
//          Target      - IPAddress being redirected.
//          Src         - Src IP address of DG that triggered RD.
//          FirstHop    - New first hop for Target.
//
//  Returns: Nothing.
//
void
Redirect(NetTableEntry * NTE, IPAddr RDSrc, IPAddr Target, IPAddr Src,
         IPAddr FirstHop)
{
    uint                MTU;
    RouteTableEntry     *RTE;
    CTELockHandle       Handle;
    IP_STATUS           Status;
    IPRouteNotifyOutput RNO = {0};

    if (IP_ADDR_EQUAL(FirstHop, NULL_IP_ADDR) ||
        IP_LOOPBACK(FirstHop) ||
        IP_ADDR_EQUAL(FirstHop, RDSrc) ||
        !(NTE->nte_flags & NTE_VALID)) {

        // Invalid FirstHop
        return;
    }

    if (GetAddrType(FirstHop) == DEST_LOCAL) {
        KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,
                   "Redirect: Local firsthop %x\n", FirstHop));
        return;
    }

    // If the redirect is received on a loopback interface, drop it.
    // This can happen in case of NAT, where it sends a packet to an addr in
    // its local pool.
    // These addresses are local but not bound to any interface and IP doesn't
    // know about them
    if (NTE == LoopNTE)
        return;

    ASSERT((NTE->nte_if->if_promiscuousmode) ||
           ((!NTE->nte_if->if_promiscuousmode) &&
            IP_ADDR_EQUAL(NTE->nte_addr, Src)));

    // First make sure that this came from the gateway we're currently using to
    // get to Target, and then lookup up the route to the new first hop. The new
    // firsthop must be directly reachable, and on the same subnetwork or
    // physical interface on which we received the redirect.

    CTEGetLock(&RouteTableLock.Lock, &Handle);

    // Make sure the source of the redirect is the current first hop gateway.
    RTE = LookupRTE(Target, Src, HOST_ROUTE_PRI, FALSE);
    if (RTE == NULL || IP_ADDR_EQUAL(RTE->rte_addr, IPADDR_LOCAL) ||
        !IP_ADDR_EQUAL(RTE->rte_addr, RDSrc)) {
        CTEFreeLock(&RouteTableLock.Lock, Handle);
        return;                    // A bad redirect.

    }
    ASSERT(RTE->rte_flags & RTE_IF_VALID);

    // If the current first hop gateway is a default gateway, see if we have
    // another default gateway at FirstHop that is down. If so, mark him as
    // up and invalidate the RCEs on this guy.
    if (RTE->rte_mask == DEFAULT_MASK && ValidateDefaultGWs(FirstHop) != 0) {
        // Have a default gateway that's been newly activated. Invalidate RCEs
        // on the route, and we're done.
        InvalidateRCEChain(RTE);
        CTEFreeLock(&RouteTableLock.Lock, Handle);
        return;
    }
    // We really need to add a host route through FirstHop. Make sure he's
    // a valid first hop.
    RTE = LookupRTE(FirstHop, Src, HOST_ROUTE_PRI, FALSE);
    if (RTE == NULL) {
        CTEFreeLock(&RouteTableLock.Lock, Handle);
        return;                    // Can't get there from here.

    }
    ASSERT(RTE->rte_flags & RTE_IF_VALID);

    // Check to make sure the new first hop is directly reachable, and is on the
    // same subnet or physical interface we received the redirect on.
    if (!IP_ADDR_EQUAL(RTE->rte_addr, IPADDR_LOCAL) || // Not directly reachable
                                                       // or wrong subnet.
         ((NTE->nte_addr & NTE->nte_mask) != (FirstHop & NTE->nte_mask))) {
        CTEFreeLock(&RouteTableLock.Lock, Handle);
        return;
    }
    if (RTE->rte_link)
        MTU = RTE->rte_link->link_mtu;
    else
        MTU = RTE->rte_mtu;

    // Now add a host route. AddRoute will do the correct things with shifting
    // RCEs around. We know that FirstHop is on the same subnet as NTE (from
    // the check above), so it's valid to add the route to FirstHop as out
    // going through NTE.
    Status = LockedAddRoute(Target, HOST_MASK,
                            IP_ADDR_EQUAL(FirstHop, Target)
                                ? IPADDR_LOCAL : FirstHop,
                            NTE->nte_if, MTU, 1, IRE_PROTO_ICMP, ATYPE_OVERRIDE,
                            RTE->rte_context, FALSE, &RNO);

    CTEFreeLock(&RouteTableLock.Lock, Handle);

    if (Status == IP_SUCCESS && RNO.irno_ifindex) {
        RtChangeNotifyEx(&RNO);
        RtChangeNotify(&RNO);
    }

    //
    // Bug: #67333: delete the old route thru' RDSrc, now that we have a new one.
    //
    // KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,
    //            "Re-direct: deleting old route thru: %lx, to Target: %lx\n",
    //            RDSrc, Target));
    DeleteRoute(Target, HOST_MASK, RDSrc, NTE->nte_if, 0);

}

//* GetRaisedMTU - Get the next largest MTU in table.
//
//  A utility function to search the MTU table for a larger value.
//
//  Input:  PrevMTU - MTU we're currently using. We want the next largest one.
//
//  Returns: New MTU size.
//
uint
GetRaisedMTU(uint PrevMTU)
{
    uint i;

    for (i = (sizeof(MTUTable) / sizeof(uint)) - 1; i != 0; i--) {
        if (MTUTable[i] > PrevMTU)
            break;
    }

    return MTUTable[i];
}

//* GuessNewMTU - Guess a new MTU, giving a DG size too big.
//
//  A utility function to search the MTU table. As input we take in an MTU
//  size we believe to be too large, and search the table looking for the
//  next smallest one.
//
//  Input:  TooBig      - Size that's too big.
//
//  Returns: New MTU size.
//
uint
GuessNewMTU(uint TooBig)
{
    uint i;

    for (i = 0; i < ((sizeof(MTUTable) / sizeof(uint)) - 1); i++)
        if (MTUTable[i] < TooBig)
            break;

    return MTUTable[i];
}

//* RouteFragNeeded - Handle being told we need to fragment.
//
//  Called when we receive some external indication that we need to fragment
//  along a particular path. If we're doing MTU discovery we'll try to
//  update the route, if we can. We'll also notify the upper layers about
//  the new MTU.
//
//  Input:  IPH     - Pointer to IP Header of datagram needing
//                    fragmentation.
//          NewMTU  - New MTU to be used (may be 0).
//
//      Returns: Nothing.
//
void
RouteFragNeeded(IPHeader UNALIGNED * IPH, ushort NewMTU)
{
    uint                OldMTU;
    CTELockHandle       Handle;
    RouteTableEntry     *RTE;
    ushort              HeaderLength;
    ushort              mtu;
    IP_STATUS           Status;
    IPRouteNotifyOutput RNO = {0};

    // If we're not doing PMTU discovery, don't do anything.
    if (!PMTUDiscovery) {
        return;
    }

    // We're doing PMTU discovery. Before doing any work, make sure this is
    // an acceptable message.

    if (GetAddrType(IPH->iph_dest) != DEST_REMOTE) {
        KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,
                   "RouteFragNeeded: non-remote dest %x\n", IPH->iph_dest));
        return;
    }

    // Correct the given new MTU for the IP header size, which we don't save
    // as we track MTUs.
    if (NewMTU != 0) {
        // Make sure the new MTU we got is at least the minimum valid size.
        NewMTU = MAX(NewMTU, MIN_VALID_MTU);
        NewMTU -= sizeof(IPHeader);
    }
    HeaderLength = (IPH->iph_verlen & (uchar) ~ IP_VER_FLAG) << 2;

    // Get the current routing information.

    CTEGetLock(&RouteTableLock.Lock, &Handle);

    // Find an RTE for the destination.
    RTE = LookupRTE(IPH->iph_dest, IPH->iph_src, HOST_ROUTE_PRI, FALSE);

    // If we couldn't find one, give up now.
    if (RTE == NULL) {
        // No RTE. Just bail out now.
        CTEFreeLock(&RouteTableLock.Lock, Handle);
        return;
    }

    if (RTE->rte_link)
        mtu = (ushort) RTE->rte_link->link_mtu;
    else
        mtu = (ushort) RTE->rte_mtu;

    // If the existing MTU is less than the new
    // MTU, give up now.

    if ((OldMTU = mtu) < NewMTU) {
        // No RTE, or an invalid new MTU. Just bail out now.
        CTEFreeLock(&RouteTableLock.Lock, Handle);
        return;
    }
    // If the new MTU is zero, figure out what the new MTU should be.
    if (NewMTU == 0) {
        ushort DGLength;

        // The new MTU is zero. We'll make a best guess what the new
        // MTU should be. We have the RTE for this route already.

        // Get the length of the datagram that triggered this. Since we'll
        // be comparing it against MTU values that we track without the
        // IP header size included, subtract off that amount.
        DGLength = (ushort) net_short(IPH->iph_length) - sizeof(IPHeader);

        // We may need to correct this as per RFC 1191 for dealing with
        // old style routers.
        if (DGLength >= OldMTU) {
            // The length of the datagram sent is not less than our
            // current MTU estimate, so we need to back it down (assuming
            // that the sending route has incorrectly added in the header
            // length).
            DGLength -= HeaderLength;

        }
        // If it's still larger than our current MTU, use the current
        // MTU. This could happen if the upper layer sends a burst of
        // packets which generate a sequence of ICMP discard messages. The
        // first one we receive will cause us to lower our MTU. We then
        // want to discard subsequent messages to avoid lowering it
        // too much. This could conceivably be a problem if our
        // first adjustment still results in an MTU that's too big,
        // but we should converge adequately fast anyway, and it's
        // better than accidentally underestimating the MTU.

        if (DGLength > OldMTU)
            NewMTU = (ushort) OldMTU;
        else
            // Move down the table to the next lowest MTU.
            NewMTU = (ushort) GuessNewMTU(DGLength);
    }

    // We have the new MTU. Now add it to the table as a host route.
    Status = IP_GENERAL_FAILURE;
    if (NewMTU != OldMTU) {

        // Use ICMP protocol type only when adding a new host route;
        // otherwise, an existing static entry might get overwritten and,
        // later on, timed out as though it were an ICMP route.

        if (IP_ADDR_EQUAL(RTE->rte_dest,IPH->iph_dest)) {

            Status = LockedAddRoute(IPH->iph_dest, HOST_MASK, RTE->rte_addr,
                                    RTE->rte_if, NewMTU, RTE->rte_metric,
                                    RTE->rte_proto, ATYPE_OVERRIDE,
                                    RTE->rte_context, FALSE, &RNO);
        } else {
            Status = LockedAddRoute(IPH->iph_dest, HOST_MASK, RTE->rte_addr,
                                    RTE->rte_if, NewMTU, RTE->rte_metric,
                                    IRE_PROTO_ICMP, ATYPE_OVERRIDE,
                                    RTE->rte_context, FALSE, &RNO);
        }
    }


    CTEFreeLock(&RouteTableLock.Lock, Handle);

    // We've added the route. Now notify the upper layers of the change.
    ULMTUNotify(IPH->iph_dest, IPH->iph_src, IPH->iph_protocol,
                (void *)((uchar *) IPH + HeaderLength), NewMTU);

    if (Status == IP_SUCCESS && RNO.irno_ifindex) {
        RtChangeNotifyEx(&RNO);
        RtChangeNotify(&RNO);
    }
}

//** IPRouteTimeout - IP routeing timeout handler.
//
//  The IP routeing timeout routine, called once a minute. We look at all
//  host routes, and if we raise the MTU on them we do so.
//
//  Entry:  Timer       - Timer being fired.
//          Context     - Pointer to NTE being time out.
//
//  Returns: Nothing.
//
void
IPRouteTimeout(CTEEvent * Timer, void *Context)
{
    uint            Now = CTESystemUpTime() / 1000L;
    CTELockHandle   Handle;
    uint            i;
    RouteTableEntry *RTE, *PrevRTE;
    uint            RaiseMTU, Delta;
    Interface       *IF;
    IPAddr          Dest;
    uint            NewMTU;
    NetTableEntry   *NTE;
    RouteTableEntry *pOldBestRTE, *pNewBestRTE;
    UINT            IsDataLeft, IsValid;
    UCHAR           IteratorContext[CONTEXT_SIZE];
    uint mtu;
    RtChangeList    *CurrentRtChangeList = NULL;

    DampCheck();

    if ((CTEInterlockedIncrementLong(&RouteTimerTicks) * IP_ROUTE_TIMEOUT) == IP_RTABL_TIMEOUT) {
        RouteTimerTicks = 0;

        CTEGetLock(&RouteTableLock.Lock, &Handle);

        // First we set up an iterator over all routes
        RtlZeroMemory(IteratorContext, CONTEXT_SIZE);

        // Do we have any routes at all in the table ?
        IsDataLeft = RTValidateContext(IteratorContext, &IsValid);

        PrevRTE = NULL;

        while (IsDataLeft) {
            // Advance context by getting the next route
            IsDataLeft = GetNextRoute(IteratorContext, &RTE);

            // Do we have to delete the previous route ?
            if (PrevRTE != NULL) {
                IPRouteNotifyOutput RNO = {0};
                RtChangeList        *NewRtChange;

                // Retrieve information about the route for change-notification
                // before proceeding with deletion.

                RNO.irno_dest = PrevRTE->rte_dest;
                RNO.irno_mask = PrevRTE->rte_mask;
                RNO.irno_nexthop = GetNextHopForRTE(PrevRTE);
                RNO.irno_proto = PrevRTE->rte_proto;
                RNO.irno_ifindex = PrevRTE->rte_if->if_index;
                RNO.irno_metric = PrevRTE->rte_metric;
                RNO.irno_flags = IRNO_FLAG_DELETE;

                DelRoute(PrevRTE->rte_dest, PrevRTE->rte_mask,
                         PrevRTE->rte_addr, PrevRTE->rte_if, MATCH_FULL,
                         &PrevRTE, &pOldBestRTE, &pNewBestRTE);

                CleanupP2MP_RTE(PrevRTE);
                CleanupRTE(PrevRTE);

                //... so we don't delete same route again
                PrevRTE = NULL;

                // Allocate, initialize and queue a change-notification entry
                // for the deleted route.

                NewRtChange = CTEAllocMemNBoot(sizeof(RtChangeList), 'XICT');
                if (NewRtChange != NULL) {
                    NewRtChange->rt_next = CurrentRtChangeList;
                    NewRtChange->rt_info = RNO;
                    CurrentRtChangeList = NewRtChange;
                }
            }
            // Make sure this route is a valid host route
            if (!(RTE->rte_flags & RTE_VALID))
                continue;

            if (RTE->rte_mask != HOST_MASK)
                continue;

            // We have valid host route here

            if (PMTUDiscovery) {
                // Check to see if we can raise the MTU on this guy.
                Delta = Now - RTE->rte_mtuchange;

                if (RTE->rte_flags & RTE_INCREASE)
                    RaiseMTU = (Delta >= MTU_INCREASE_TIME ? 1 : 0);
                else
                    RaiseMTU = (Delta >= MTU_DECREASE_TIME ? 1 : 0);

                if (RaiseMTU) {
                    // We need to raise this MTU. Set his change time to
                    // Now, so we don't do this again, and figure out
                    // what the new MTU should be.
                    RTE->rte_mtuchange = Now;
                    IF = RTE->rte_if;
                    if (RTE->rte_mtu < IF->if_mtu) {
                        uint RaisedMTU;

                        RTE->rte_flags |= RTE_INCREASE;
                        // This is a candidate for change. Figure out
                        // what it should be.
                        RaisedMTU = GetRaisedMTU(RTE->rte_mtu);
                        NewMTU = MIN(RaisedMTU,
                                     IF->if_mtu);
                        RTE->rte_mtu = NewMTU;
                        Dest = RTE->rte_dest;

                        // We have the new MTU. Free the lock, and walk
                        // down the NTEs on the I/F. For each NTE,
                        // call up to the upper layer and tell him what
                        // his new MTU is.
                        CTEFreeLock(&RouteTableLock.Lock, Handle);
                        NTE = IF->if_nte;
                        while (NTE != NULL) {
                            if (NTE->nte_flags & NTE_VALID) {
                                ULMTUNotify(Dest, NTE->nte_addr, 0, NULL,
                                            MIN(NewMTU, NTE->nte_mss));
                            }
                            NTE = NTE->nte_ifnext;
                        }

                        // We've notified everyone. Get the lock again,
                        // and validate context in case something changed
                        // after we freed the lock. In case it's invalid,
                        // start from first. We've updated the mtuchange
                        // time of this RTE, so we won't hit him again.
                        CTEGetLock(&RouteTableLock.Lock, &Handle);

                        RTValidateContext(IteratorContext, &IsValid);

                        if (!IsValid) {
                            RtlZeroMemory(IteratorContext, CONTEXT_SIZE);

                            IsDataLeft = RTValidateContext(IteratorContext, &IsValid);

                            continue;
                        }
                        // We still have a valid iterator context here
                    } else {
                        RTE->rte_flags &= ~RTE_INCREASE;
                    }
                }
            }

            // If this route came in via ICMP, and we have no RCEs on it,
            // and it's at least 10 minutes old, delete it.
            if (RTE->rte_proto == IRE_PROTO_ICMP &&
                RTE->rte_rcelist == NULL &&
                (Now - RTE->rte_valid) > MAX_ICMP_ROUTE_VALID) {
                // He needs to be deleted. Call DelRoute to do this.
                // But after you have updated the context to next RTE

                // Route for deletion in next iteration
                PrevRTE = RTE;
                continue;
            }
        }

        // Did we have to delete the previous route ?
        if (PrevRTE != NULL) {

            IPRouteNotifyOutput RNO = {0};
            RtChangeList        *NewRtChange;

            // Retrieve information about the route for change-notification
            // before proceeding with deletion.

            RNO.irno_dest = PrevRTE->rte_dest;
            RNO.irno_mask = PrevRTE->rte_mask;
            RNO.irno_nexthop = GetNextHopForRTE(PrevRTE);
            RNO.irno_proto = PrevRTE->rte_proto;
            RNO.irno_ifindex = PrevRTE->rte_if->if_index;
            RNO.irno_metric = PrevRTE->rte_metric;
            RNO.irno_flags = IRNO_FLAG_DELETE;

            // Delete the route and perform cleanup.

            DelRoute(PrevRTE->rte_dest, PrevRTE->rte_mask, PrevRTE->rte_addr,
                     PrevRTE->rte_if, MATCH_FULL, &PrevRTE, &pOldBestRTE,
                     &pNewBestRTE);

            CleanupP2MP_RTE(PrevRTE);
            CleanupRTE(PrevRTE);

            // Allocate, initialize and queue a change-notification entry
            // for the deleted route.

            NewRtChange = CTEAllocMemNBoot(sizeof(RtChangeList), 'DiCT');
            if (NewRtChange != NULL) {
                NewRtChange->rt_next = CurrentRtChangeList;
                NewRtChange->rt_info = RNO;
                CurrentRtChangeList = NewRtChange;
            }
        }

        CTEFreeLock(&RouteTableLock.Lock, Handle);
    }
#if FFP_SUPPORT
    if (FFPFlushRequired) {
        FFPFlushRequired = FALSE;
        IPFlushFFPCaches();
    }
#endif

    if ((CTEInterlockedIncrementLong(&FlushIFTimerTicks) * IP_ROUTE_TIMEOUT) == FLUSH_IFLIST_TIMEOUT) {
        Interface *TmpIF;
        RouteCacheEntry *RCE, *PrevRCE;

        FlushIFTimerTicks = 0;

        CTEGetLock(&RouteTableLock.Lock, &Handle);

        // check whether FreeIFList is non empty
        if (FrontFreeList) {
            ASSERT(*(int *)&TotalFreeInterfaces > 0);
            // free the first interface in the list
            TmpIF = FrontFreeList;
            FrontFreeList = FrontFreeList->if_next;
            CTEFreeMem(TmpIF);
            TotalFreeInterfaces--;

            // check whether the list became empty
            if (FrontFreeList == NULL) {
                RearFreeList = NULL;
                ASSERT(TotalFreeInterfaces == 0);
            }
        }
        // use the same timer to scan the RCEFreeList

        PrevRCE = STRUCT_OF(RouteCacheEntry, &RCEFreeList, rce_next);
        RCE = RCEFreeList;

        while (RCE) {
            if (RCE->rce_usecnt == 0) {
                RouteCacheEntry *nextRCE;
                // time to free this RCE
                // remove it from the list
                PrevRCE->rce_next = RCE->rce_next;
                if (RCE->rce_flags & RCE_REFERENCED) {
                    // IF is ref'd so it better be in the IFList
                    LockedDerefIF((Interface *) RCE->rce_rte);
                }
                nextRCE = RCE->rce_next;
                CTEFreeMem(RCE);
                RCE = nextRCE;
            } else {
                PrevRCE = RCE;
                RCE = RCE->rce_next;
            }
        }

        CTEFreeLock(&RouteTableLock.Lock, Handle);
    }

    // Call RtChangeNotify for each of the entries in the change-notification
    // list that we've built up so far. In the process, free each entry.

    if (CurrentRtChangeList) {
        RtChangeList    *TmpRtChangeList;

        do {
            TmpRtChangeList = CurrentRtChangeList->rt_next;
            RtChangeNotify(&CurrentRtChangeList->rt_info);
            CTEFreeMem(CurrentRtChangeList);
            CurrentRtChangeList = TmpRtChangeList;
        } while(CurrentRtChangeList);
    }

    // If the driver is unloading, dont restart the timer

    if (fRouteTimerStopping) {
        CTESignal(&TcpipUnloadBlock, NDIS_STATUS_SUCCESS);
    } else {
        CTEStartTimer(&IPRouteTimer, IP_ROUTE_TIMEOUT, IPRouteTimeout, NULL);
    }
}

//* FreeFWPacket - Free a fowarding packet to its pool.
//
//  Input:  Packet - Packet to be freed.
//
//  Returns: nothing.
//
void
FreeFWPacket(PNDIS_PACKET Packet)
{
    FWContext *FWC = (FWContext *)Packet->ProtocolReserved;

    ASSERT(FWC->fc_pc.pc_common.pc_IpsecCtx == NULL);

    // Return any buffers to their respective pools.
    //
    if (FWC->fc_buffhead) {
        PNDIS_BUFFER Head, Mdl;
        Head = FWC->fc_buffhead;
        do {
            Mdl = Head;
            Head = Head->Next;
            MdpFree(Mdl);
        } while (Head);
        FWC->fc_buffhead = NULL;
    }

    if (FWC->fc_options) {
        CTEFreeMem(FWC->fc_options);
        FWC->fc_options = NULL;
        FWC->fc_optlength = 0;
        FWC->fc_pc.pc_common.pc_flags &= ~PACKET_FLAG_OPTIONS;
    }

    if (FWC->fc_iflink) {
        DerefLink(FWC->fc_iflink);
        FWC->fc_iflink = NULL;
    }

    if (FWC->fc_if) {
        DerefIF(FWC->fc_if);
        FWC->fc_if = NULL;
    }

    NdisReinitializePacket(Packet);
#if MCAST_BUG_TRACKING
    FWC->fc_pc.pc_common.pc_owner = 0;
#endif

    FwPacketFree(Packet);
}

//* FWSendComplete  - Complete the transmission of a forwarded packet.
//
//  This is called when the send of a forwarded packet is done. We'll free the
//  resources and get the next send going, if there is one. If there isn't,
//  we'll decrement the pending count.
//
//  Input:  Packet      - Packet being completed.
//          Buffer      - Pointer to buffer chain being completed.
//
//  Returns: Nothing.
//
void
FWSendComplete(void *SendContext, PNDIS_BUFFER Buffer, IP_STATUS SendStatus)
{
    PNDIS_PACKET Packet = (PNDIS_PACKET) SendContext;
    FWContext *FWC = (FWContext *) Packet->ProtocolReserved;
    RouteSendQ *RSQ;
    CTELockHandle Handle;
    FWQ *NewFWQ;
    PNDIS_PACKET NewPacket;

#if MCAST_BUG_TRACKING
    FWC->fc_MacHdrSize = SendStatus;
#endif

    if (Buffer && FWC->fc_bufown) {

        //Undo the offset manipulation
        //which was done in super fast path

        int MacHeaderSize = FWC->fc_MacHdrSize;
        PNDIS_PACKET RtnPacket = FWC->fc_bufown;

        NdisAdjustBuffer(
            Buffer,
            (PCHAR) NdisBufferVirtualAddress(Buffer) - MacHeaderSize,
            NdisBufferLength(Buffer) + MacHeaderSize);

        Packet->Private.Head = NULL;
        Packet->Private.Tail = NULL;

        NdisReturnPackets(&RtnPacket, 1);

        FWC->fc_bufown = NULL;
#if MCAST_BUG_TRACKING
        FWC->fc_sos = __LINE__;
#endif

        FreeFWPacket(Packet);

        return;

    }
    if (!IS_BCAST_DEST(FWC->fc_dtype))
        RSQ = &((RouteInterface *) FWC->fc_if)->ri_q;
    else
        RSQ = BCastRSQ;

    if (IS_MCAST_DEST(FWC->fc_dtype)) {
        RSQ = NULL;
    }
#if MCAST_BUG_TRACKING
    FWC->fc_sos = __LINE__;
#endif

    FreeFWPacket(Packet);

    if (RSQ == NULL) {
        return;
    }
    CTEGetLock(&RSQ->rsq_lock, &Handle);
    ASSERT(RSQ->rsq_pending <= RSQ->rsq_maxpending);

    RSQ->rsq_pending--;

    ASSERT(*(int *)&RSQ->rsq_pending >= 0);

    if (RSQ->rsq_qlength != 0) {    // Have more to send.

        ASSERT(IPSecHandlerPtr == NULL);

        // Make sure we're not already running through this. If we are, quit.
        if (!RSQ->rsq_running) {

            // We could schedule this off for an event, but under NT that
            // could me a context switch for every completing packet in the
            // normal case. For now, just do it in a loop guarded with
            // rsq_running.
            RSQ->rsq_running = TRUE;

            // Loop while we haven't hit our send limit and we still have
            // stuff to send.
            while (RSQ->rsq_pending < RSQ->rsq_maxpending &&
                   RSQ->rsq_qlength != 0) {

                ASSERT(RSQ->rsq_qh.fq_next != &RSQ->rsq_qh);

                // Pull one off the queue, and update qlength.
                NewFWQ = RSQ->rsq_qh.fq_next;
                RSQ->rsq_qh.fq_next = NewFWQ->fq_next;
                NewFWQ->fq_next->fq_prev = NewFWQ->fq_prev;
                RSQ->rsq_qlength--;

                // Update pending before we send.
                RSQ->rsq_pending++;
                CTEFreeLock(&RSQ->rsq_lock, Handle);
                NewPacket = PACKET_FROM_FWQ(NewFWQ);
                TransmitFWPacket(NewPacket,
                                 ((FWContext *) NewPacket->ProtocolReserved)->fc_datalength);
                CTEGetLock(&RSQ->rsq_lock, &Handle);
            }

            RSQ->rsq_running = FALSE;
        }
    }
    CTEFreeLock(&RSQ->rsq_lock, Handle);
}

//* TransmitFWPacket - Transmit a forwarded packet on a link.
//
//  Called when we know we can send a packet. We fix up the header, and send it.
//
//  Input:  Packet      - Packet to be sent.
//          DataLength  - Length of data.
//
//  Returns: Nothing.
//
void
TransmitFWPacket(PNDIS_PACKET Packet, uint DataLength)
{
    FWContext *FC = (FWContext *) Packet->ProtocolReserved;
    PNDIS_BUFFER HBuffer, Buffer;
    IP_STATUS Status;
    PVOID VirtualAddress;
    UINT BufLen;
    ULONG ipsecByteCount = 0;
    ULONG ipsecMTU;
    ULONG ipsecFlags;
    IPHeader *IPH;
    ULONG len;
    IPAddr SrcAddr;
    PNDIS_BUFFER OptBuffer;
    PNDIS_BUFFER newBuf = NULL;
    IPHeader *pSaveIPH;
    UCHAR saveIPH[MAX_IP_HDR_SIZE + ICMP_HEADER_SIZE];
    ULONG hdrLen;
    void *ArpCtxt = NULL;

    //
    // Fix up the packet. Remove the existing buffer chain, and put our
    // header on the front.
    //


    Buffer = Packet->Private.Head;
    HBuffer = FC->fc_hndisbuff;
    Packet->Private.Head = HBuffer;
    Packet->Private.Tail = HBuffer;
    NDIS_BUFFER_LINKAGE(HBuffer) = (PNDIS_BUFFER) NULL;
    Packet->Private.TotalLength = sizeof(IPHeader);
    Packet->Private.Count = 1;

    TcpipQueryBuffer(HBuffer, &VirtualAddress, &BufLen, NormalPagePriority);

    if (VirtualAddress == NULL) {
#if MCAST_BUG_TRACKING
        FC->fc_mtu = __LINE__;
#endif
        FWSendComplete(Packet, Buffer, IP_SUCCESS);
        IPSInfo.ipsi_outdiscards++;
        return;
    }
    Packet->Private.PhysicalCount =
        ADDRESS_AND_SIZE_TO_SPAN_PAGES(VirtualAddress,
                                       sizeof(IPHeader));

    TcpipQueryBuffer(HBuffer, (PVOID *) &IPH, &len, NormalPagePriority);

    if (IPH == NULL) {
#if MCAST_BUG_TRACKING
        FC->fc_mtu = __LINE__;
#endif
        FWSendComplete(Packet, Buffer, IP_SUCCESS);
        IPSInfo.ipsi_outdiscards++;
        return;
    }
    if (IPSecHandlerPtr) {
        //
        // See if IPSEC is enabled, see if it needs to do anything with this
        // packet - we need to construct the full IP header in the first MDL
        // before we call out to IPSEC.
        //
        IPSEC_ACTION Action;
        ulong csum;
        PUCHAR pTpt;
        ULONG tptLen;

        pSaveIPH = (IPHeader *) saveIPH;
        *pSaveIPH = *IPH;

        csum = xsum(IPH, sizeof(IPHeader));

        //
        // Link the header buffer to the options buffer before we indicate
        // to IPSEC
        //

        if (FC->fc_options) {

            //
            // Allocate the MDL for options too
            //

            NdisAllocateBuffer(&Status,
                               &OptBuffer,
                               BufferPool,
                               FC->fc_options,
                               (uint) FC->fc_optlength);

            if (Status != NDIS_STATUS_SUCCESS) {

                //
                // Couldn't get the needed option buffer.
                //
#if MCAST_BUG_TRACKING
                FC->fc_mtu = __LINE__;
#endif
                FWSendComplete(Packet, Buffer, IP_SUCCESS);
                IPSInfo.ipsi_outdiscards++;
                return;
            }
            NDIS_BUFFER_LINKAGE(HBuffer) = OptBuffer;
            NDIS_BUFFER_LINKAGE(OptBuffer) = Buffer;

            //
            // update the xsum in the IP header
            //

            FC->fc_pc.pc_common.pc_flags |= PACKET_FLAG_OPTIONS;
            NdisChainBufferAtBack(Packet, OptBuffer);
            csum += xsum(FC->fc_options, (uint) FC->fc_optlength);
            csum = (csum >> 16) + (csum & 0xffff);
            csum += (csum >> 16);

        } else {

            NDIS_BUFFER_LINKAGE(HBuffer) = Buffer;
        }

        //
        // Prepare ourselves for sending an ICMP dont frag in case
        // IPSEC bloats beyond the MTU on this interface.
        //
        // SendICMPErr expects the next transport header in the same
        // contiguous buffer as the IPHeader, with or without options.
        // We need to ensure that this is satisfied if in fact we need to
        // fragment on account of IPSEC. So, setup the buffer right here.
        //

        //
        // If this is a zero-payload packet (i.e. just a header), then Buffer
        // is NULL and there is nothing for IPSEC to bloat.  We only have to
        // deal with the don't fragment flag if there is a Buffer.
        //
        if (Buffer && (pSaveIPH->iph_offset & IP_DF_FLAG)) {

            TcpipQueryBuffer(Buffer, &pTpt, &tptLen, NormalPagePriority);

            if (pTpt == NULL) {
#if MCAST_BUG_TRACKING
                FC->fc_mtu = __LINE__;
#endif
                FWSendComplete(Packet, Buffer, IP_SUCCESS);
                IPSInfo.ipsi_outdiscards++;
                return;
            }
            RtlCopyMemory(((PUCHAR) (pSaveIPH + 1)) + FC->fc_optlength,
                       pTpt,
                       ICMP_HEADER_SIZE);
        }
        IPH->iph_xsum = ~(ushort) csum;

        SrcAddr = FC->fc_if->if_nte->nte_addr;

        ipsecMTU = FC->fc_mtu;
        if ((DataLength + (uint) FC->fc_optlength) < FC->fc_mtu) {
            ipsecByteCount = FC->fc_mtu - (DataLength + (uint) FC->fc_optlength);
        }
        ipsecFlags = IPSEC_FLAG_FORWARD;
        Action = (*IPSecHandlerPtr) ((PUCHAR) IPH,
                                     (PVOID) HBuffer,
                                     FC->fc_if,
                                     Packet,
                                     &ipsecByteCount,
                                     &ipsecMTU,
                                     (PVOID) & newBuf,
                                     &ipsecFlags,
                                     FC->fc_dtype);

        if (Action != eFORWARD) {
#if MCAST_BUG_TRACKING
            FC->fc_mtu = __LINE__;
#endif
            FWSendComplete(Packet, Buffer, IP_SUCCESS);

            IPSInfo.ipsi_outdiscards++;

            //
            // We can get MTU redeuced also when forwarding because in the nested
            // tunneling configuration, the tunnel that starts from this machine
            // can get a ICMP PMTU packet.  We can't reduce the MTU on the interface
            // but we can send back to the sender (which can be a router with yet
            // another tunnel for this packet) a PMTU packet asking him to reduce his
            // MTU even further.  If the sender is an end-station, this PMTU info
            // will eventually propogate back to TCP stack.  If it is a router, the
            // same logic used here will be applied.  The MTU info will thus be
            // relayed all the way back to the original sender (TCP stack).
            // Of course the more common case is that a packet with the added IPSec
            // header exceeds the link MTU.  No matter what is the case, we send the
            // new MTU information back to the sender.
            //
            if (ipsecMTU) {
                SendICMPIPSecErr(SrcAddr,
                                 pSaveIPH,
                                 ICMP_DEST_UNREACH,
                                 FRAG_NEEDED,
                                 net_long((ulong) (ipsecMTU + sizeof(IPHeader))));
                KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"TransmitFWPacket: Sent ICMP frag_needed to %lx, from src: %lx\n", pSaveIPH->iph_src, SrcAddr));
            }
            return;

        } else {

            //
            // Use the new buffer chain - IPSEC will restore the old one
            // on send complete
            //

            if (newBuf) {

                NdisReinitializePacket(Packet);
                NdisChainBufferAtBack(Packet, newBuf);
            }
            DataLength += ipsecByteCount;
        }
    }
    //
    // Figure out how to send it. If it's not a broadcast we'll either
    // send it or have it fragmented. If it is a broadcast we'll let our
    // send broadcast routine handle it.
    //

    if (FC->fc_dtype != DEST_BCAST) {

        if ((DataLength + (uint) FC->fc_optlength) <= FC->fc_mtu) {

            if (FC->fc_iflink) {

                ASSERT(FC->fc_if->if_flags & IF_FLAGS_P2MP);
                ArpCtxt = FC->fc_iflink->link_arpctxt;

            }
            //
            // In case of synchronous completion though
            // FreeIPPacket is called, which will not
            // free the FW packet.
            //
            Status = SendIPPacket(FC->fc_if,
                                  FC->fc_nexthop,
                                  Packet,
                                  Buffer,
                                  FC->fc_hbuff,
                                  FC->fc_options,
                                  (uint) FC->fc_optlength,
                                  (BOOLEAN) (IPSecHandlerPtr != NULL),
                                  ArpCtxt,
                                  FALSE);
        } else {

            //
            // Need to fragment this.
            //

            BufferReference *BR = CTEAllocMemN(sizeof(BufferReference), 'GiCT');

            if (BR == (BufferReference *) NULL) {

                //
                // Couldn't get a BufferReference
                //
#if MCAST_BUG_TRACKING
                FC->fc_mtu = __LINE__;
#endif
                FWSendComplete(Packet, Buffer, IP_SUCCESS);
                return;
            }
            BR->br_buffer = Buffer;
            BR->br_refcount = 0;
            CTEInitLock(&BR->br_lock);
            FC->fc_pc.pc_br = BR;
            BR->br_userbuffer = 0;

            if (IPSecHandlerPtr) {

                Buffer = NDIS_BUFFER_LINKAGE(HBuffer);

                //
                // This is to ensure that options are freed appropriately.
                // In the fragment code, the first fragment inherits the
                // options of the entire packet; but these packets have
                // no IPSEC context, hence cannot be freed appropriately.
                // So, we allocate temporary options here and use these
                // to represent the real options. These are freed when the
                // first fragment is freed and the real options are freed here.
                //

                if (FC->fc_options) {

                    PUCHAR tmpOptions;

                    if (newBuf) {

                        //
                        // if a new buffer chain was returned above by IPSEC,
                        // then it is most prob. a tunnel => options were
                        // copied, hence get rid of ours.
                        //

                        NdisFreeBuffer(OptBuffer);
                        CTEFreeMem(FC->fc_options);
                        FC->fc_options = NULL;
                        FC->fc_optlength = 0;

                    } else {

                        Buffer = NDIS_BUFFER_LINKAGE(OptBuffer);
                        NdisFreeBuffer(OptBuffer);

                    }

                    FC->fc_pc.pc_common.pc_flags &= ~PACKET_FLAG_OPTIONS;
                }
                NDIS_BUFFER_LINKAGE(HBuffer) = NULL;
                NdisReinitializePacket(Packet);
                NdisChainBufferAtBack(Packet, HBuffer);
                IPH->iph_xsum = 0;

                //
                // If the DF flag is set, make sure the packet doesn't need
                // fragmentation. If this is the case, send an ICMP error
                // now while we still have the original IP header. The ICMP
                // message includes the MTU so the source host can perform
                // Path MTU discovery.
                //
                // IPSEC headers might have caused this to happen.
                // Send an ICMP to the source so he can adjust his MTU.
                //

                if (pSaveIPH->iph_offset & IP_DF_FLAG) {

                    IPSInfo.ipsi_fragfails++;

                    SendICMPIPSecErr(SrcAddr,
                                     pSaveIPH,
                                     ICMP_DEST_UNREACH,
                                     FRAG_NEEDED,
                                     net_long((ulong) (FC->fc_mtu - ipsecByteCount + sizeof(IPHeader))));

                    KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"TransmitFWPacket: Sent ICMP frag_needed to %lx, from src: %lx\n", pSaveIPH->iph_src, SrcAddr));

                    // FreeIPpacket will do header fix up if
                    // original header chain was modified by ipsec/firewall/hdrincl

                    Status = IP_PACKET_TOO_BIG;
                    FreeIPPacket(Packet, TRUE, Status);


                    // Don't want to fall through and complete packet after
                    // we have freed it.
                    return;

                } else {

                    //
                    // DF bit is not set, ok to fragment
                    //

                    if (FC->fc_iflink) {

                        ASSERT(FC->fc_if->if_flags & IF_FLAGS_P2MP);
                        ArpCtxt = FC->fc_iflink->link_arpctxt;

                    }
                    Status = IPFragment(FC->fc_if,
                                        FC->fc_mtu - ipsecByteCount,
                                        FC->fc_nexthop,
                                        Packet,
                                        FC->fc_hbuff,
                                        Buffer,
                                        DataLength,
                                        FC->fc_options,
                                        (uint) FC->fc_optlength,
                                        (int *)NULL,
                                        FALSE,
                                        ArpCtxt);

                    //
                    // Fragmentation needed with the DF flag set should have
                    // been handled in IPForward. We don't have the original
                    // header any longer, so silently drop the packet.
                    //

                    ASSERT(Status != IP_PACKET_TOO_BIG);
                }

            } else {

                //
                // No IPSec handler. No need to check for DF bit here
                // because unlike in the IPSec case, we are not messing
                // with the MTUs so the DF check done in IPForwardPkt is
                // valid
                //

                if (FC->fc_iflink) {
                    ASSERT(FC->fc_if->if_flags & IF_FLAGS_P2MP);
                    ArpCtxt = FC->fc_iflink->link_arpctxt;
                }
                Status = IPFragment(FC->fc_if,
                                    FC->fc_mtu - ipsecByteCount,
                                    FC->fc_nexthop,
                                    Packet,
                                    FC->fc_hbuff,
                                    Buffer,
                                    DataLength,
                                    FC->fc_options,
                                    (uint) FC->fc_optlength,
                                    (int *)NULL,
                                    FALSE,
                                    ArpCtxt);
                //
                // Fragmentation needed with the DF flag set should have been
                // handled in IPForward. We don't have the original header
                // any longer, so silently drop the packet.
                //

                ASSERT(Status != IP_PACKET_TOO_BIG);
            }
        }

    } else {

        //
        // Dest type is bcast
        //

        Status = SendIPBCast(FC->fc_srcnte,
                             FC->fc_nexthop,
                             Packet,
                             FC->fc_hbuff,
                             Buffer,
                             DataLength,
                             FC->fc_options,
                             (uint) FC->fc_optlength,
                             FC->fc_sos,
                             &FC->fc_index);

    }

    if (Status != IP_PENDING) {
#if MCAST_BUG_TRACKING
        FC->fc_mtu = __LINE__;
#endif
        FWSendComplete(Packet, Buffer, IP_SUCCESS);
    }
}

//* SendFWPacket - Send a packet that needs to be forwarded.
//
//  This routine is invoked when we actually get around to sending a packet.
//  We look and see if we can give another queued send to the outgoing link,
//  and if so we send on that link. Otherwise we put it on the outgoing queue
//  and remove it later.
//
//  Input:  SrcNTE      - Source NTE of packet.
//          Packet      - Packet to be send, containg all needed context info.
//          Status      - Status of transfer data.
//          DataLength  - Length in bytes of data to be send.
//
//  Returns: Nothing.
//
void
SendFWPacket(PNDIS_PACKET Packet, NDIS_STATUS Status, uint DataLength)
{

    FWContext *FC = (FWContext *) Packet->ProtocolReserved;
    Interface *IF = FC->fc_if;
    RouteSendQ *RSQ;
    CTELockHandle Handle;

    if (Status == NDIS_STATUS_SUCCESS) {
        // Figure out which logical queue it belongs on, and if we don't already
        // have too many things going there, send it. If we can't send it now we'll
        // queue it for later.
        if (IS_BCAST_DEST(FC->fc_dtype))
            RSQ = BCastRSQ;
        else
            RSQ = &((RouteInterface *) IF)->ri_q;

        CTEGetLock(&RSQ->rsq_lock, &Handle);

        if ((RSQ->rsq_pending < RSQ->rsq_maxpending) && (RSQ->rsq_qlength == 0)) {
            // We can send on this interface.
            RSQ->rsq_pending++;
            CTEFreeLock(&RSQ->rsq_lock, Handle);

            TransmitFWPacket(Packet, DataLength);

        } else {                // Need to queue this packet for later.

            if (IPSecHandlerPtr) {
                ASSERT(RSQ->rsq_qlength == 0);
                CTEFreeLock(&RSQ->rsq_lock, Handle);
                IPSInfo.ipsi_outdiscards++;
#if MCAST_BUG_TRACKING
                FC->fc_mtu = __LINE__;
#endif
                FreeFWPacket(Packet);
            } else {

                FC->fc_datalength = DataLength;
                FC->fc_q.fq_next = &RSQ->rsq_qh;
                FC->fc_q.fq_prev = RSQ->rsq_qh.fq_prev;
                RSQ->rsq_qh.fq_prev->fq_next = &FC->fc_q;
                RSQ->rsq_qh.fq_prev = &FC->fc_q;
                RSQ->rsq_qlength++;
                CTEFreeLock(&RSQ->rsq_lock, Handle);
            }
        }

    } else {
        IPSInfo.ipsi_outdiscards++;
#if MCAST_BUG_TRACKING
        FC->fc_mtu = __LINE__;
#endif
        FreeFWPacket(Packet);
    }

}

//* GetFWBufferChain - Get a buffer chain from our buffer pools
//      sufficiently long enough to be able to copy DataLength bytes into it.
//
//  Input:  DataLength   - Length in bytes that the buffer chain must be able
//                         to describe.
//          Packet       - Forwarding packet to link the buffer chain into.
//          TailPointer  - Returned pointer to the tail of the buffer chain.
//
//  Returns: Pointer to the head of the buffer chain on success, NULL
//           on failure.
//
PNDIS_BUFFER
GetFWBufferChain(uint DataLength, PNDIS_PACKET Packet,
                 PNDIS_BUFFER *TailPointer)
{
    KIRQL OldIrql;
    PNDIS_BUFFER Head, Tail, Mdl;
    HANDLE PoolHandle;
    PVOID Buffer;
    uint Remaining, Length;

    // Raise to dispatch level to make multiple calls to MdpAllocate
    // more efficient.  This is no less efficient in the single call case
    // either.
    //
#if !MILLEN
    OldIrql = KeRaiseIrqlToDpcLevel();
#endif

    // Loop allocating buffers until we have enough to describe DataLength.
    //
    Head = NULL;
    for (Remaining = DataLength; Remaining != 0; Remaining -= Length) {

        // Figure out which buffer pool to use based on the length
        // of data remaining.  Use "large" buffers unless the remaining
        // data will fit in a "small" buffer.
        //
        if (Remaining >= BUFSIZE_LARGE_POOL) {
            PoolHandle = IpForwardLargePool;
            Length = BUFSIZE_LARGE_POOL;
        } else if (Remaining > BUFSIZE_SMALL_POOL) {
            PoolHandle = IpForwardLargePool;
            Length = Remaining;
        } else {
            PoolHandle = IpForwardSmallPool;
            Length = Remaining;
        }

        // Allocate a buffer from the chosen pool and link it at the tail.
        //
        Mdl = MdpAllocateAtDpcLevel(PoolHandle, &Buffer);
        if (Mdl) {

            // Expect MdpAllocate to initialize Mdl->Next.
            //
            ASSERT(!Mdl->Next);

            NdisAdjustBufferLength(Mdl, Length);

            if (!Head) {
                Head = Mdl;
            } else {
                Tail->Next = Mdl;
            }
            Tail = Mdl;

        } else {
            // Free what we allocated so far and quit the loop.
            //
            while (Head) {
                Mdl = Head;
                Head = Head->Next;
                MdpFree(Mdl);
            }

            // Need to leave the loop with Head == NULL in the error
            // case for the remaining logic to work correctly.
            //
            ASSERT(!Head);
            break;
        }

    }

#if !MILLEN
    KeLowerIrql(OldIrql);
#endif

    // If we've succeeded, put the buffer chain in the packet and
    // adjust our forwarding context.
    //
    if (Head) {
        FWContext *FWC = (FWContext *)Packet->ProtocolReserved;

        ASSERT(Tail);

        NdisChainBufferAtFront(Packet, Head);
        FWC->fc_buffhead = Head;
        FWC->fc_bufftail = Tail;
        *TailPointer = Tail;
    }

    return Head;
}

//* AllocateCopyBuffers - Get a buffer chain from our buffer pools
//      sufficiently long enough to be able to copy DataLength bytes into it.
//
//  Input:  Packet       - Forwarding packet to link the buffer chain into.
//          DataLength   - Length in bytes that the buffer chain must be able
//                         to describe.
//          Head         - Returned pointer to the head of the buffer chain.
//          CountBuffers - Returned count of buffers in the chain.
//
//  Returns: NDIS_STATUS_SUCCESS or NDIS_STATUS_RESOURCES
//
NDIS_STATUS
AllocateCopyBuffers(PNDIS_PACKET Packet, uint DataLength, PNDIS_BUFFER *Head,
                    uint *CountBuffers)
{
    PNDIS_BUFFER Tail, Mdl;
    uint Count = 0;

    *Head = GetFWBufferChain(DataLength, Packet, &Tail);
    if (*Head) {
        for (Count = 1, Mdl = *Head; Mdl != Tail; Mdl = Mdl->Next, Count++);

        *CountBuffers = Count;

        return NDIS_STATUS_SUCCESS;
    }

    return NDIS_STATUS_RESOURCES;
}

//* GetFWBuffer - Get a list of buffers for forwarding.
//
//  This routine gets a list of buffers for forwarding, and puts the data into
//  it. This may involve calling TransferData, or we may be able to copy
//  directly into them ourselves.
//
//  Input:  SrcNTE          - Pointer to NTE on which packet was received.
//          Packet          - Packet being forwarded, used for TD.
//          Data            - Pointer to data buffer being forwarded.
//          DataLength      - Length in bytes of Data.
//          BufferLength    - Length in bytes available in buffer pointer to
//                            by Data.
//          Offset          - Offset into original data from which to transfer.
//          LContext1, LContext2 - Context values for the link layer.
//
//  Returns: NDIS_STATUS of attempt to get buffer.
//
NDIS_STATUS
GetFWBuffer(NetTableEntry * SrcNTE, PNDIS_PACKET Packet, uchar * Data,
            uint DataLength, uint BufferLength, uint Offset,
            NDIS_HANDLE LContext1, uint LContext2)
{
    CTELockHandle Handle;
    uint BufNeeded, i;
    PNDIS_BUFFER FirstBuffer, CurrentBuffer;
    void *DestPtr;
    Interface *SrcIF;
    FWContext *FWC;
    uint LastBufSize;
    uint FirewallMode = 0;

    FirstBuffer = GetFWBufferChain(DataLength, Packet, &CurrentBuffer);
    if (!FirstBuffer) {
        return NDIS_STATUS_RESOURCES;
    }

#if DBG
    {
        uint TotalBufferSize;
        PNDIS_BUFFER TempBuffer;

        // Sanity check the buffer chain and packet.
        TempBuffer = FirstBuffer;
        TotalBufferSize = 0;
        while (TempBuffer != NULL) {
            TotalBufferSize += NdisBufferLength(TempBuffer);
            TempBuffer = NDIS_BUFFER_LINKAGE(TempBuffer);
        }

        ASSERT(TotalBufferSize == DataLength);
        NdisQueryPacket(Packet, NULL, NULL, NULL, &TotalBufferSize);
        ASSERT(TotalBufferSize == DataLength);
    }
#endif

    // First buffer points to the list of buffers we have. If we can copy the
    // data here, do so, otherwise invoke the link's transfer data routine.
    //    if ((DataLength <= BufferLength) && (SrcNTE->nte_flags & NTE_COPY))
    // change because of firewall

    FirewallMode = ProcessFirewallQ();

    // If DataLength is more than Lookahead size, we may need to
    // call transfer data handler. If IpSec is enabled, make sure that this
    // instance is not from loopback interface.

    if (((DataLength <= BufferLength) && (SrcNTE->nte_flags & NTE_COPY)) ||
        (FirewallMode) || (SrcNTE->nte_if->if_promiscuousmode) ||
        ((SrcNTE != LoopNTE) && IPSecHandlerPtr && ForwardFilterEnabled)) {
        while (DataLength) {
            uint CopyLength;

            TcpipQueryBuffer(FirstBuffer, &DestPtr, &CopyLength, NormalPagePriority);

            if (DestPtr == NULL) {
                return NDIS_STATUS_RESOURCES;
            }

            RtlCopyMemory(DestPtr, Data, CopyLength);
            Data += CopyLength;
            DataLength -= CopyLength;
            FirstBuffer = NDIS_BUFFER_LINKAGE(FirstBuffer);
        }
        return NDIS_STATUS_SUCCESS;
    }
    // We need to call transfer data for this.

    SrcIF = SrcNTE->nte_if;
    return (*(SrcIF->if_transfer)) (SrcIF->if_lcontext, LContext1, LContext2,
                                    Offset, DataLength, Packet, &DataLength);

}

//* GetFWPacket - Get a packet for forwarding.
//
//  Called when we need to get a packet to forward a datagram.
//
//  Input:  ReturnedPacket - Pointer to where to return a packet.
//
//  Returns: Pointer to IP header buffer.
//
IPHeader *
GetFWPacket(PNDIS_PACKET *ReturnedPacket)
{
    PNDIS_PACKET Packet;

    Packet = FwPacketAllocate(0, 0, 0);
    if (Packet) {
        FWContext *FWC = (FWContext *)Packet->ProtocolReserved;
        PNDIS_PACKET_EXTENSION PktExt =
            NDIS_PACKET_EXTENSION_FROM_PACKET(Packet);

#if MCAST_BUG_TRACKING
        if (FWC->fc_pc.pc_common.pc_owner == PACKET_OWNER_IP) {
           DbgPrint("Packet %x",Packet);
           DbgBreakPoint();
        }
        FWC->fc_pc.pc_common.pc_owner = PACKET_OWNER_IP;
#else
        ASSERT(FWC->fc_pc.pc_common.pc_owner == PACKET_OWNER_IP);
#endif
        ASSERT(FWC->fc_hndisbuff);
        ASSERT(FWC->fc_hbuff);

        ASSERT(FWC->fc_pc.pc_pi == RtPI);
        ASSERT(FWC->fc_pc.pc_context == Packet);

        FWC->fc_pc.pc_common.pc_flags |= PACKET_FLAG_IPHDR;
        FWC->fc_pc.pc_common.pc_IpsecCtx = NULL;
        FWC->fc_pc.pc_br = NULL;
        FWC->fc_pc.pc_ipsec_flags = 0;

        PktExt = NDIS_PACKET_EXTENSION_FROM_PACKET(Packet);
        PktExt->NdisPacketInfo[TcpIpChecksumPacketInfo] = NULL;
        PktExt->NdisPacketInfo[IpSecPacketInfo] = NULL;
        PktExt->NdisPacketInfo[TcpLargeSendPacketInfo] = NULL;

        // Make sure that fwpackets cancel ids are initialized.
#if !MILLEN
        NDIS_SET_PACKET_CANCEL_ID(Packet, NULL);
#endif

        *ReturnedPacket = Packet;

        return FWC->fc_hbuff;
    }

    return NULL;
}

//* IPForward / Forward a packet.
//
//  The routine called when we need to forward a packet. We check if we're
//  supposed to act as a gateway, and if we are and the incoming packet is a
//  bcast we check and see if we're supposed to forward broadcasts. Assuming
//  we're supposed to forward it, we will process any options. If we find some,
//  we do some validation to make sure everything is good. After that, we look
//  up the next hop. If we can't find one, we'll issue an error.  Then we get
//  a packet and buffers, and send it.
//
//  Input:  SrcNTE          - NTE for net on which we received this.
//          Header          - Pointer to received IPheader.
//          HeaderLength    - Length of header.
//          Data            - Pointer to data to be forwarded.
//          BufferLength    - Length in bytes available in the buffer.
//          LContext1       - lower-layer context supplied upon reception
//          LContext2       - lower-layer context supplied upon reception
//          DestType        - Type of destination.
//          MacHeadersize   - Media header size
//          pNdisBuffer     - Pointer to NDIS_BUFFER describing the frame
//          pClientCnt      - Ndis return variable indicating
//                            if miniport buffer is pended
//          LinkCtxt        - contains per-link context for link-receptions
//
//  Returns: Nothing.
//
void
IPForwardPkt(NetTableEntry *SrcNTE, IPHeader UNALIGNED *Header,
             uint HeaderLength, void *Data, uint BufferLength,
             NDIS_HANDLE LContext1, uint LContext2, uchar DestType,
             uint MacHeaderSize, PNDIS_BUFFER pNdisBuffer, uint *pClientCnt,
             LinkEntry *LinkCtxt)
{
    uchar *Options;
    uchar OptLength;
    OptIndex Index;
    IPAddr DestAddr;                // IP address we're routing towards.
    uchar SendOnSource = DisableSendOnSource;
    IPAddr NextHop;                 // Next hop IP address.
    PNDIS_PACKET Packet;
    FWContext *FWC;
    IPHeader *NewHeader;            // New header.
    NDIS_STATUS Status;
    uint DataLength;
    CTELockHandle TableHandle;
    uchar ErrIndex;
    IPAddr OutAddr;                 // Address of interface we're send out on.
    Interface *IF;                  // Interface we're sending out on.
    uint MTU;
    BOOLEAN HoldPkt = TRUE;
    RouteCacheEntry *FwdRce;
    uint FirewallMode = 0;
    void *ArpCtxt = NULL;
    LinkEntry *Link = NULL;

    DEBUGMSG(DBG_TRACE && DBG_FWD,
        (DTEXT("IPForwardPkt(%x, %x, %d, %x, %d,...)\n"),
        SrcNTE, Header, HeaderLength, Data, BufferLength));

    if (ForwardPackets) {

        DestAddr = Header->iph_dest;

        // If it's a broadcast, see if we can forward it. We won't forward it if broadcast
        // forwarding is turned off, or the destination if the local (all one's) broadcast,
        // or it's a multicast (Class D address). We'll pass through subnet broadcasts in
        // case there's a source route. This would be odd - maybe we should disable this?
        if (IS_BCAST_DEST(DestType)) {

#if IPMCAST
            if (((DestType == DEST_REM_MCAST) ||
                 (DestType == DEST_MCAST)) &&
                (g_dwMcastState == MCAST_STARTED)) {
                BOOLEAN Filter;

                //
                // Dont forward local groups
                //

                if (((Header->iph_dest & 0x00FFFFFF) == 0x000000E0) ||
                    (Header->iph_ttl <= 1) ||
                    !(SrcNTE->nte_if->if_mcastflags & IPMCAST_IF_ENABLED)) {
                    return;
                }
                if (pNdisBuffer) {
                    Filter = IPMForwardAfterRcvPkt(SrcNTE, Header, HeaderLength,
                                                   Data, BufferLength,
                                                   LContext1, LContext2,
                                                   DestType, MacHeaderSize,
                                                   pNdisBuffer, pClientCnt,
                                                   LinkCtxt);
                } else {
                    Filter = IPMForwardAfterRcv(SrcNTE, Header, HeaderLength,
                                                Data, BufferLength, LContext1,
                                                LContext2, DestType, LinkCtxt);
                }
                if (Filter && ForwardFilterEnabled) {
                    NotifyFilterOfDiscard(SrcNTE, Header, Data, BufferLength);
                }
                return;
            }
#endif

            if (!ForwardBCast) {
                if (DestType > DEST_REMOTE)
                    IPSInfo.ipsi_inaddrerrors++;
                if (ForwardFilterEnabled) {
                    NotifyFilterOfDiscard(SrcNTE, Header, Data, BufferLength);
                }
                return;
            }
            if ((DestAddr == IP_LOCAL_BCST) ||
                (DestAddr == IP_ZERO_BCST) ||
                (DestType == DEST_SN_BCAST) ||
                CLASSD_ADDR(DestAddr)) {
                if (ForwardFilterEnabled) {
                    NotifyFilterOfDiscard(SrcNTE, Header, Data, BufferLength);
                }
                return;
            }
            // broad cast
            HoldPkt = FALSE;
        } else {

            FirewallMode = ProcessFirewallQ();

            if ((DestType == DEST_REMOTE) && (!FirewallMode)) {
                NetTableEntry* OrigNTE = SrcNTE;
                SrcNTE = BestNTEForIF(Header->iph_src, SrcNTE->nte_if);
                if (SrcNTE == NULL) {
                    // Something bad happened.
                    if (ForwardFilterEnabled) {
                        NotifyFilterOfDiscard(OrigNTE, Header, Data,
                                              BufferLength);
                    }
                    return;
                }
            }
        }
        // If the TTL would expire, send a message.
        if (Header->iph_ttl <= 1) {
            IPSInfo.ipsi_inhdrerrors++;
            if (!ForwardFilterEnabled ||
                NotifyFilterOfDiscard(SrcNTE, Header, Data, BufferLength)) {
                SendICMPErr(SrcNTE->nte_addr, Header, ICMP_TIME_EXCEED,
                            TTL_IN_TRANSIT, 0);
            }
            return;
        }
        DataLength = net_short(Header->iph_length) - HeaderLength;

        Index.oi_srtype = NO_SR;    // So we know we don't have a source route.

        Index.oi_srindex = MAX_OPT_SIZE;
        Index.oi_rrindex = MAX_OPT_SIZE;
        Index.oi_tsindex = MAX_OPT_SIZE;

        // Now check for options, and process any we find.
        if (HeaderLength != sizeof(IPHeader)) {
            IPOptInfo OptInfo;

            RtlZeroMemory(&OptInfo, sizeof(OptInfo));

            // Options and possible SR . No buffer ownership opt
            HoldPkt = FALSE;

            OptInfo.ioi_options = (uchar *) (Header + 1);
            OptInfo.ioi_optlength = (uchar) (HeaderLength - sizeof(IPHeader));
            // Validate options, and set up indices.
            if ((ErrIndex = ParseRcvdOptions(&OptInfo, &Index)) < MAX_OPT_SIZE) {
                IPSInfo.ipsi_inhdrerrors++;
                if (!ForwardFilterEnabled ||
                    NotifyFilterOfDiscard(SrcNTE, Header, Data, BufferLength)) {
                    SendICMPErr(SrcNTE->nte_addr, Header, ICMP_PARAM_PROBLEM,
                                PTR_VALID, ((uint)ErrIndex + sizeof(IPHeader)));
                }
                return;
            }
            // If source routing option was set, and source routing is disabled,
            // then drop the packet.
            if ((OptInfo.ioi_flags & IP_FLAG_SSRR) && DisableIPSourceRouting) {
                KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"Pkt dropped - Source routing disabled\n"));
                if (ForwardFilterEnabled) {
                    NotifyFilterOfDiscard(SrcNTE, Header, Data, BufferLength);
                }
                return;
            }
            Options = CTEAllocMemN(OptInfo.ioi_optlength, 'IiCT');
            if (!Options) {
                IPSInfo.ipsi_outdiscards++;
                return;            // Couldn't get an

            }                    // option buffer, return;

            // Now copy into our buffer.
            RtlCopyMemory(Options, OptInfo.ioi_options, OptLength = OptInfo.ioi_optlength);

            // See if we have a source routing option, and if so we may need to process it. If
            // we have one, and the destination in the header is us, we need to update the
            // route and the header.
            if (Index.oi_srindex != MAX_OPT_SIZE) {
                if (DestType >= DEST_REMOTE) {    // Not for us.

                    if (Index.oi_srtype == IP_OPT_SSRR) {
                        // This packet is strict source routed, but we're not
                        // the destination! We can't continue from here -
                        // perhaps we should send an ICMP, but I'm not sure
                        // which one it would be.
                        CTEFreeMem(Options);
                        IPSInfo.ipsi_inaddrerrors++;
                        if (ForwardFilterEnabled) {
                            NotifyFilterOfDiscard(SrcNTE, Header, Data,
                                                  BufferLength);
                        }
                        return;
                    }
                    Index.oi_srindex = MAX_OPT_SIZE;    // Don't need to update this.

                } else {        // This came here, we need to update the destination address.

                    uchar *SROpt = Options + Index.oi_srindex;
                    uchar Pointer;

                    Pointer = SROpt[IP_OPT_PTR] - 1;    // Index starts from one.

                    // Get the next hop address, and see if it's a broadcast.
                    DestAddr = *(IPAddr UNALIGNED *) & SROpt[Pointer];
                    DestType = GetAddrType(DestAddr);    // Find address type.

                    if ((DestType == DEST_INVALID) ||
                        (DestType == DEST_BCAST) ||
                        (DestType == DEST_SN_BCAST)) {

                        if (!ForwardFilterEnabled ||
                            NotifyFilterOfDiscard(SrcNTE, Header, Data,
                                                  BufferLength)) {
                            SendICMPErr(SrcNTE->nte_addr, Header,
                                        ICMP_DEST_UNREACH, SR_FAILED, 0);
                        }
                        IPSInfo.ipsi_inhdrerrors++;
                        CTEFreeMem(Options);
                        return;
                    }
                    // If we came through here, any sort of broadcast needs
                    // to be sent out the way it came, so update that flag.
                    SendOnSource = EnableSendOnSource;
                }
            }
        } else {                // No options.

            Options = (uchar *) NULL;
            OptLength = 0;
        }

        IPSInfo.ipsi_forwdatagrams++;

        // We've processed the options. Now look up the next hop. If we can't
        // find one, send back an error.
        IF = LookupForwardingNextHop(DestAddr, Header->iph_src, &NextHop, &MTU,
                                     Header->iph_protocol, (uchar *) Data,
                                     BufferLength, &FwdRce, &Link,
                                     Header->iph_src);

        if (IF == NULL) {
            // Couldn't find an outgoing route.
            IPSInfo.ipsi_outnoroutes++;
            if (!ForwardFilterEnabled ||
                NotifyFilterOfDiscard(SrcNTE, Header, Data, BufferLength)) {
                SendICMPErr(SrcNTE->nte_addr, Header, ICMP_DEST_UNREACH,
                            HOST_UNREACH, 0);
            }
            if (Options)
                CTEFreeMem(Options);
            return;
        } else {
            if (IF->if_flags & IF_FLAGS_P2MP) {
                ASSERT(Link);
                if (Link) {
                    ArpCtxt = Link->link_arpctxt;
                }
            }
        }

        //
        // If the DF flag is set, make sure the packet doesn't need
        // fragmentation. If this is the case, send an ICMP error
        // now while we still have the original IP header. The ICMP
        // message includes the MTU so the source host can perform
        // Path MTU discovery.
        //
        if ((Header->iph_offset & IP_DF_FLAG) &&
            ((DataLength + (uint) OptLength) > MTU)) {
            ASSERT((MTU + sizeof(IPHeader)) >= 68);
            ASSERT((MTU + sizeof(IPHeader)) <= 0xFFFF);

            IPSInfo.ipsi_fragfails++;
            if (!ForwardFilterEnabled ||
                NotifyFilterOfDiscard(SrcNTE, Header, Data, BufferLength)) {
                SendICMPErr(SrcNTE->nte_addr, Header, ICMP_DEST_UNREACH,
                            FRAG_NEEDED,
                            net_long((ulong)(MTU + sizeof(IPHeader))));
            }

            if (Options)
                CTEFreeMem(Options);
            if (Link) {
                DerefLink(Link);
            }
            DerefIF(IF);
            return;
        }
        if (DataLength > MTU) {

            HoldPkt = FALSE;
        }

        // If there is no ipsec policy, it is safe to
        // reuse the indicated mdl chain.

        if (IPSecStatus) {
            HoldPkt = FALSE;
        }

        // See if we need to filter this packet. If we do, call the filter routine
        // to see if it's OK to forward it.
        if (ForwardFilterEnabled) {
            Interface       *InIF = SrcNTE->nte_if;
            uint            InIFIndex;
            IPAddr          InLinkNextHop;
            IPAddr          OutLinkNextHop;
            FORWARD_ACTION  Action;
            uint            FirewallMode = 0;

            FirewallMode = ProcessFirewallQ();

            if (FirewallMode) {
                InIFIndex = INVALID_IF_INDEX;
                InLinkNextHop = NULL_IP_ADDR;
            } else {
                InIFIndex = InIF->if_index;
                if ((InIF->if_flags & IF_FLAGS_P2MP) && LinkCtxt) {
                    InLinkNextHop = LinkCtxt->link_NextHop;
                } else {
                    InLinkNextHop = NULL_IP_ADDR;
                }
            }

            if ((IF->if_flags & IF_FLAGS_P2MP) && Link) {
                OutLinkNextHop = Link->link_NextHop;
            } else {
                OutLinkNextHop = NULL_IP_ADDR;
            }

            CTEInterlockedIncrementLong(&ForwardFilterRefCount);
            Action = (*ForwardFilterPtr) (Header, Data, BufferLength,
                                          InIFIndex, IF->if_index,
                                          InLinkNextHop, OutLinkNextHop);
            DerefFilterPtr();

            if (Action != FORWARD) {
                IPSInfo.ipsi_outdiscards++;
                if (Options)
                    CTEFreeMem(Options);
                if (Link) {
                    DerefLink(Link);
                }
                DerefIF(IF);

#if FFP_SUPPORT
                // Seed a -ve FFP entry; Packet henceforth dropped in NIC Driver
                TCPTRACE(("Filter dropped a packet, Seeding -ve cache entry\n"));
                IPSetInFFPCaches(Header, Data, BufferLength, FFP_DISCARD_PACKET);
#endif
                return;
            }
        }
        // If we have a strict source route and the next hop is not the one
        // specified, send back an error.
        if (Index.oi_srtype == IP_OPT_SSRR) {
            if (DestAddr != NextHop) {
                IPSInfo.ipsi_outnoroutes++;
                SendICMPErr(SrcNTE->nte_addr, Header, ICMP_DEST_UNREACH,
                            SR_FAILED, 0);
                CTEFreeMem(Options);
                if (Link) {
                    DerefLink(Link);
                }
                DerefIF(IF);
                return;
            }
        }
        // Update the options, if we can and we need to.
        if ((DestType != DEST_BCAST) && Options != NULL) {
            NetTableEntry *OutNTE;

            // Need to find a valid source address for the outgoing interface.
            CTEGetLock(&RouteTableLock.Lock, &TableHandle);
            OutNTE = BestNTEForIF(DestAddr, IF);
            if (OutNTE == NULL) {
                // No NTE for this IF. Something's wrong, just bail out.
                CTEFreeLock(&RouteTableLock.Lock, TableHandle);
                CTEFreeMem(Options);
                if (Link) {
                    DerefLink(Link);
                }
                DerefIF(IF);
                return;
            } else {
                OutAddr = OutNTE->nte_addr;
                CTEFreeLock(&RouteTableLock.Lock, TableHandle);
            }

            ErrIndex = UpdateOptions(Options, &Index,
                                     (IP_LOOPBACK(OutAddr) ? DestAddr : OutAddr));

            if (ErrIndex != MAX_OPT_SIZE) {
                IPSInfo.ipsi_inhdrerrors++;
                SendICMPErr(OutAddr, Header, ICMP_PARAM_PROBLEM, PTR_VALID,
                            ((ulong) ErrIndex + sizeof(IPHeader)));
                CTEFreeMem(Options);
                if (Link) {
                    DerefLink(Link);
                }
                DerefIF(IF);
                return;
            }
        }
        // Send a redirect, if we need to. We'll send a redirect if the packet
        // is going out on the interface it came in on and the next hop address
        // is on the same subnet as the NTE we received it on, and if there
        // are no source route options. We also need to make sure that the
        // source of the datagram is on the I/F we received it on, so we don't
        // send a redirect to another gateway.
        // SendICMPErr will check and not send a redirect if this is a broadcast.
        if ((SrcNTE->nte_if == IF) &&
            IP_ADDR_EQUAL(SrcNTE->nte_addr & SrcNTE->nte_mask,
                          NextHop & SrcNTE->nte_mask) &&
            IP_ADDR_EQUAL(SrcNTE->nte_addr & SrcNTE->nte_mask,
                          Header->iph_src & SrcNTE->nte_mask)) {
            if (Index.oi_srindex == MAX_OPT_SIZE) {

#ifdef REDIRECT_DEBUG

#define PR_IP_ADDR(x) \
    ((x)&0x000000ff),(((x)&0x0000ff00)>>8),(((x)&0x00ff0000)>>16),(((x)&0xff000000)>>24)

                DbgPrint("IP: Sending Redirect. IF = %x SRC_NTE = %x SrcNteIF = %x\n",
                         IF, SrcNTE, SrcNTE->nte_if);

                DbgPrint("IP: SrcNteAddr = %d.%d.%d.%d Mask = %d.%d.%d.%d\n",
                         PR_IP_ADDR(SrcNTE->nte_addr), PR_IP_ADDR(SrcNTE->nte_mask));

                DbgPrint("IP: NextHop = %d.%d.%d.%d Header Src = %d.%d.%d.%d, Dst = %d.%d.%d.%d\n",
                         PR_IP_ADDR(NextHop),
                         PR_IP_ADDR(Header->iph_src),
                         PR_IP_ADDR(Header->iph_dest));

#endif

                SendICMPErr(SrcNTE->nte_addr, Header, ICMP_REDIRECT,
                            REDIRECT_HOST, NextHop);
            }
        }
        // We have the next hop. Now get a forwarding packet.
        if ((NewHeader = GetFWPacket(&Packet)) != NULL) {

            Packet->Private.Flags |= NDIS_PROTOCOL_ID_TCP_IP;
            // Save the packet forwarding context info.
            FWC = (FWContext *) Packet->ProtocolReserved;
            FWC->fc_options = Options;
            FWC->fc_optlength = OptLength;
            FWC->fc_if = IF;
            FWC->fc_mtu = MTU;
            FWC->fc_srcnte = SrcNTE;
            FWC->fc_nexthop = NextHop;
            FWC->fc_sos = SendOnSource;
            FWC->fc_dtype = DestType;
            FWC->fc_index = Index;
            FWC->fc_iflink = Link;

            if (pNdisBuffer && HoldPkt &&
                (NDIS_GET_PACKET_STATUS((PNDIS_PACKET) LContext1) != NDIS_STATUS_RESOURCES)) {
                uint xsum;

                DEBUGMSG(DBG_INFO && DBG_FWD,
                    (DTEXT("IPForwardPkt: bufown %x\n"), pNdisBuffer));

                // Buffer transfer possible!

                //ASSERT(LContext2 <= 8);

                MacHeaderSize += LContext2;

                // remember the original Packet and mac hdr size

                FWC->fc_bufown = LContext1;
                FWC->fc_MacHdrSize = MacHeaderSize;

                //Munge ttl and xsum fields

                Header->iph_ttl = Header->iph_ttl - 1;

                xsum = Header->iph_xsum + 1;

                //add carry
                Header->iph_xsum = (ushort)(xsum + (xsum >> 16));


                // Adjust incoming mdl  pointer and counts

                NdisAdjustBuffer(
                    pNdisBuffer,
                    (PCHAR) NdisBufferVirtualAddress(pNdisBuffer) + MacHeaderSize,
                    NdisBufferLength(pNdisBuffer) - MacHeaderSize);

                //Now link this mdl to the packet

                Packet->Private.Head = pNdisBuffer;
                Packet->Private.Tail = pNdisBuffer;

                Packet->Private.TotalLength = DataLength + HeaderLength;
                Packet->Private.Count = 1;

                // We never loopback the packet
                // except if we are in promiscuous mode
                if (!IF->if_promiscuousmode) {
                    NdisSetPacketFlags(Packet, NDIS_FLAGS_DONT_LOOPBACK);
                }

                Status = (*(IF->if_xmit)) (IF->if_lcontext, &Packet, 1,
                                           NextHop, FwdRce, ArpCtxt);

                DbgNumPktFwd++;

                if (Status != NDIS_STATUS_PENDING) {
                    NdisAdjustBuffer(
                        pNdisBuffer,
                        (PCHAR) NdisBufferVirtualAddress(pNdisBuffer) - MacHeaderSize,
                        NdisBufferLength(pNdisBuffer) + MacHeaderSize);

                    Packet->Private.Head = NULL;
                    Packet->Private.Tail = NULL;

                    FWC->fc_bufown = NULL;
#if MCAST_BUG_TRACKING
                    FWC->fc_mtu = __LINE__;
#endif
                    FreeFWPacket(Packet);
                    *pClientCnt = 0;
                } else {
                    //Okay, the xmit is pending indicate this to ndis.
                    *pClientCnt = 1;
                }

                return;

            } else {
                FWC->fc_bufown = NULL;
            }

            // Fill in the header in the forwarding context

            NewHeader->iph_verlen = Header->iph_verlen;
            NewHeader->iph_tos = Header->iph_tos;
            NewHeader->iph_length = Header->iph_length;
            NewHeader->iph_id = Header->iph_id;
            NewHeader->iph_offset = Header->iph_offset;
            NewHeader->iph_protocol = Header->iph_protocol;
            NewHeader->iph_src = Header->iph_src;

            NewHeader->iph_dest = DestAddr;
            NewHeader->iph_ttl = Header->iph_ttl - 1;
            NewHeader->iph_xsum = 0;

            // Now that we have a packet, go ahead and transfer data the
            // data in if we need to.
            if (DataLength == 0) {
                Status = NDIS_STATUS_SUCCESS;
            } else {
                Status = GetFWBuffer(SrcNTE, Packet, Data, DataLength,
                                     BufferLength, HeaderLength, LContext1,
                                     LContext2);
            }

            // If the status is pending, don't do anything now. Otherwise,
            // if the status is success send the packet.
            if (Status != NDIS_STATUS_PENDING)
                if (Status == NDIS_STATUS_SUCCESS) {

                    if (!IF->if_promiscuousmode) {
                        NdisSetPacketFlags(Packet, NDIS_FLAGS_DONT_LOOPBACK);
                    }
                    SendFWPacket(Packet, Status, DataLength);
                } else {
                    // Some sort of failure. Free the packet.
                    IPSInfo.ipsi_outdiscards++;
#if MCAST_BUG_TRACKING
                    FWC->fc_mtu = __LINE__;
#endif
                    FreeFWPacket(Packet);
                }
        } else {                // Couldn't get a packet, so drop this.

            DEBUGMSG(DBG_ERROR && DBG_FWD,
                (DTEXT("IPForwardPkt: failed to get a forwarding packet!\n")));

            IPSInfo.ipsi_outdiscards++;
            if (Options)
                CTEFreeMem(Options);
            if (Link) {
                DerefLink(Link);
            }
            DerefIF(IF);
        }
    } else { // Forward called, but forwarding turned off.

        DEBUGMSG(DBG_WARN && DBG_FWD,
            (DTEXT("IPForwardPkt: Forwarding called but is actually OFF.\n")));

        if (DestType != DEST_BCAST && DestType != DEST_SN_BCAST) {
            // No need to go through here for strictly broadcast packets,
            // although we want to bump the counters for remote bcast stuff.
            IPSInfo.ipsi_inaddrerrors++;

            if (!IS_BCAST_DEST(DestType)) {
                if (DestType == DEST_LOCAL)        // Called when local, must be SR.

                    SendICMPErr(SrcNTE->nte_addr, Header,
                                ICMP_DEST_UNREACH, SR_FAILED, 0);
            }
        }
    }

}

//* AddNTERoutes - Add the routes for an NTE.
//
//  Called during initalization or during DHCP address assignment to add
//  routes. We add routes for the address of the NTE, including routes
//  to the subnet and the address itself.
//
//  Input:  NTE     - NTE for which to add routes.
//
//  Returns: TRUE if they were all added, FALSE if not.
//
uint
AddNTERoutes(NetTableEntry * NTE)
{
    IPMask              Mask, SNMask;
    Interface           *IF;
    CTELockHandle       Handle;
    IPAddr              AllSNBCast;
    IP_STATUS           Status;
    IPRouteNotifyOutput RNO = {0};

    // First, add the route to the address itself. This is a route through
    // the loopback interface.

    IF_IPDBG(IP_DEBUG_ADDRESS)
        KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,
                   " AddNTE: Adding host route for %x\n", NTE->nte_addr));

    IF = NTE->nte_if;

    if (AddRoute(NTE->nte_addr, HOST_MASK, IPADDR_LOCAL, LoopNTE->nte_if,
                 LOOPBACK_MSS, IF->if_metric, IRE_PROTO_LOCAL, ATYPE_OVERRIDE,
                 0, 0) != IP_SUCCESS)
        return FALSE;

    Mask = IPNetMask(NTE->nte_addr);

    // Now add the route for the all-subnet's broadcast, if one doesn't already
    // exist. There is special case code to handle this in SendIPBCast, so the
    // exact interface we add this on doesn't really matter.

    CTEGetLock(&RouteTableLock.Lock, &Handle);
    AllSNBCast = (NTE->nte_addr & Mask) | (IF->if_bcast & ~Mask);

    IF_IPDBG(IP_DEBUG_ADDRESS)
        KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,
                   " AddNTE: SNBCast address %x\n", AllSNBCast));
    Status = LockedAddRoute(AllSNBCast, HOST_MASK, IPADDR_LOCAL, IF,
                            NTE->nte_mss, IF->if_metric, IRE_PROTO_LOCAL,
                            ATYPE_PERM, 0, FALSE, &RNO);
    CTEFreeLock(&RouteTableLock.Lock, Handle);

    if (Status != IP_SUCCESS) {
        return FALSE;
    } else if (RNO.irno_ifindex) {
        RtChangeNotifyEx(&RNO);
        RtChangeNotify(&RNO);
    }

    // If we're doing IGMP, add the route to the multicast address.
    if (IGMPLevel != 0) {

        IF_IPDBG(IP_DEBUG_ADDRESS)
            KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,
                       " AddNTE: Adding classD address\n"));

        if (AddRoute(MCAST_DEST, CLASSD_MASK, IPADDR_LOCAL, NTE->nte_if,
                     NTE->nte_mss, IF->if_metric, IRE_PROTO_LOCAL, ATYPE_PERM,
                     0, 0) != IP_SUCCESS)
            return FALSE;
    }
    if (NTE->nte_mask != HOST_MASK) {
        // And finally the route to the subnet.
        SNMask = NTE->nte_mask;

        IF_IPDBG(IP_DEBUG_ADDRESS)
            KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,
                       " AddNTE: Adding subnet route %x\n",
                       NTE->nte_addr & SNMask));

        if (AddRoute(NTE->nte_addr & SNMask, SNMask, IPADDR_LOCAL, NTE->nte_if,
                     NTE->nte_mss, IF->if_metric, IRE_PROTO_LOCAL, ATYPE_PERM,
                     0, 0) != IP_SUCCESS)
            return FALSE;
    }

    return TRUE;
}

//* DelNTERoutes - Add the routes for an NTE.
//
//  Called when we receive media disconnect indication.
//  routes.
//
//  Input:  NTE                     - NTE for which to delete routes.
//
//  Returns: TRUE if they were all deleted, FALSE if not.
//
uint
DelNTERoutes(NetTableEntry * NTE)
{
    IPMask Mask, SNMask;
    Interface *IF;
    CTELockHandle Handle;
    IPAddr AllSNBCast;
    uint retVal;

    retVal = TRUE;

    // First, delete the route to the address itself. This is a route through
    // the loopback interface.
    if (DeleteRoute(NTE->nte_addr, HOST_MASK, IPADDR_LOCAL, LoopNTE->nte_if, 0) != IP_SUCCESS)
        retVal = FALSE;

    // If we're doing IGMP, add the route to the multicast address.
    if (IGMPLevel != 0) {
        if (!(NTE->nte_flags & NTE_IF_DELETING) &&
            (NTE->nte_if->if_ntecount == 0)) {    // this is the last NTE on this if

            if (DeleteRoute(MCAST_DEST, CLASSD_MASK, IPADDR_LOCAL, NTE->nte_if, 0) != IP_SUCCESS)
                retVal = FALSE;
        }
    }
    if (NTE->nte_mask != HOST_MASK) {
        // And finally the route to the subnet.
        // if there are no other NTEs on IF for the same subnet route

        NetTableEntry *tmpNTE = NTE->nte_if->if_nte;

        while (tmpNTE) {

            if ((tmpNTE != NTE) && (tmpNTE->nte_flags & NTE_VALID) && ((tmpNTE->nte_addr & tmpNTE->nte_mask) == (NTE->nte_addr & NTE->nte_mask))) {
                break;
            }
            tmpNTE = tmpNTE->nte_ifnext;

        }

        if (!tmpNTE) {

            SNMask = NTE->nte_mask;

            if (DeleteRoute(NTE->nte_addr & SNMask, SNMask, IPADDR_LOCAL, NTE->nte_if, 0) != IP_SUCCESS)
                retVal = FALSE;

        }
    }
    if (!(NTE->nte_flags & NTE_IF_DELETING)) {
        Interface *IF = NTE->nte_if;
        NetTableEntry *tmpNTE = IF->if_nte;
        IPMask Mask;
        IPAddr AllSNBCast;

        Mask = IPNetMask(NTE->nte_addr);

        AllSNBCast = (NTE->nte_addr & Mask) | (IF->if_bcast & ~Mask);

        while (tmpNTE) {
            IPMask tmpMask;
            IPAddr tmpAllSNBCast;

            tmpMask = IPNetMask(tmpNTE->nte_addr);

            tmpAllSNBCast = (tmpNTE->nte_addr & tmpMask) | (IF->if_bcast & ~tmpMask);

            if ((tmpNTE != NTE) && (tmpNTE->nte_flags & NTE_VALID) && IP_ADDR_EQUAL(AllSNBCast, tmpAllSNBCast)) {
                break;
            }
            tmpNTE = tmpNTE->nte_ifnext;
        }

        if (!tmpNTE) {
            // Delete the route for the all-subnet's broadcast.
            if (DeleteRoute(AllSNBCast, HOST_MASK, IPADDR_LOCAL, IF, 0) != IP_SUCCESS)
                retVal = FALSE;
        }
    }

    return retVal;
}

//* DelIFRoutes - Delete the routes for an interface.
//
//  Called when we receive media disconnect indication.
//  routes.
//
//  Input:  IF      - IF for which to delete routes.
//
//  Returns: TRUE if they were all deleted, FALSE if not.
//
uint
DelIFRoutes(Interface * IF)
{
    NetTableEntry *NTE;
    uint i;

    for (i = 0; i < NET_TABLE_SIZE; i++) {
        NetTableEntry *NetTableList = NewNetTableList[i];
        for (NTE = NetTableList; NTE != NULL; NTE = NTE->nte_next) {
            if ((NTE->nte_flags & NTE_VALID) && NTE->nte_if == IF) {

                // This guy is on the interface, and needs to be deleted.
                if (!DelNTERoutes(NTE)) {
                    return FALSE;
                }
            }
        }
    }
    return TRUE;
}

//* AddIFRoutes - Add the routes for an interface.
//
//  Called when we receive media disconnect indication.
//  routes.
//
//  Input:  IF  - IF for which to Add routes.
//
//  Returns: TRUE if they were all Added, FALSE if not.
//
uint
AddIFRoutes(Interface * IF)
{
    NetTableEntry *NTE;
    uint i;

    for (i = 0; i < NET_TABLE_SIZE; i++) {
        NetTableEntry *NetTableList = NewNetTableList[i];
        for (NTE = NetTableList; NTE != NULL; NTE = NTE->nte_next) {
            if ((NTE->nte_flags & NTE_VALID) && NTE->nte_if == IF) {

                // This guy is on the interface, and needs to be added.
                if (!AddNTERoutes(NTE)) {
                    return FALSE;
                }
            }
        }
    }
    return TRUE;
}

#pragma BEGIN_INIT

uint BCastMinMTU = 0xffff;

//* InitNTERouting -  do per NTE route initialization.
//
//  Called when we need to initialize per-NTE routing. For the specified NTE,
//  call AddNTERoutes to  add a route for a net bcast, subnet bcast, and local
//  attached subnet. The net bcast entry is sort of a filler - net and
//  global bcasts are always handled specially. For this reason we specify
//  the FirstInterface when adding the route. Subnet bcasts are assumed to
//  only go out on one interface, so the actual interface to be used is
//  specifed. If two interfaces are on the same subnet the last interface is
//  the one that will be used.
//
//  Input:  NTE             - NTE for which routing is to be initialized.
//          NumGWs          - Number of default gateways to add.
//          GWList          - List of default gateways.
//          GWMetricList    - the metric for each gateway.
//
//  Returns: TRUE if we succeed, FALSE if we don't.
//
uint
InitNTERouting(NetTableEntry * NTE, uint NumGWs, IPAddr * GWList,
               uint * GWMetricList)
{
    uint i;
    Interface *IF;

    if (NTE != LoopNTE) {
        BCastMinMTU = MIN(BCastMinMTU, NTE->nte_mss);

        IF = NTE->nte_if;
        AddRoute(IF->if_bcast, HOST_MASK, IPADDR_LOCAL, IF,
                 BCastMinMTU, 1, IRE_PROTO_LOCAL, ATYPE_OVERRIDE,
                 0, 0);    // Route for local
        // bcast.

        if (NTE->nte_flags & NTE_VALID) {
            if (!AddNTERoutes(NTE))
                return FALSE;

            // Now add the default routes that are present on this net. We
            // don't check for errors here, but we should probably
            // log an error.
            for (i = 0; i < NumGWs; i++) {
                IPAddr GWAddr;

                GWAddr = net_long(GWList[i]);

                if (IP_ADDR_EQUAL(GWAddr, NTE->nte_addr)) {
                    GWAddr = IPADDR_LOCAL;
                }

                AddRoute(NULL_IP_ADDR, DEFAULT_MASK,
                         GWAddr, NTE->nte_if, NTE->nte_mss,
                         GWMetricList[i] ? GWMetricList[i] : IF->if_metric,
                         IRE_PROTO_NETMGMT, ATYPE_OVERRIDE, 0, 0);
            }
        }
    }
    return TRUE;
}

//* EnableRouter - enables forwarding.
//
//  This routine configures this node to enable packet-forwarding.
//  It must be called with the route table lock held.
//
//  Entry:
//
//  Returns: nothing.
//
void
EnableRouter()
{
    RouterConfigured = TRUE;
    ForwardBCast = FALSE;
    ForwardPackets = TRUE;
}

//* DisableRouter - disables forwarding.
//
//  This routine configures this node to disable packet-forwarding.
//  It must be called with the route table lock held.
//
//  Entry:
//
//  Returns: nothing.
//
void
DisableRouter()
{
    RouterConfigured = FALSE;
    ForwardBCast = FALSE;
    ForwardPackets = FALSE;
}

//* IPEnableRouterWithRefCount - acquires or releases a reference to forwarding
//
//  This routine increments or decrements the reference-count on forwarding
//  functionality. When the first reference is acquired, forwarding is enabled.
//  When the last reference is released, forwarding is disabled.
//  It must be called with the route table lock held.
//
//  Entry:  Enable      - indicates whether to acquire or release a reference
//
//  Return: the number of remaining references.
//
int
IPEnableRouterWithRefCount(LOGICAL Enable)
{
    if (Enable) {
        if (++IPEnableRouterRefCount == 1 && !RouterConfigured) {
            EnableRouter();
        }
    } else {
        if (--IPEnableRouterRefCount == 0 && RouterConfigured) {
            DisableRouter();
        }
    }
    return IPEnableRouterRefCount;
}

//* InitRouting - Initialize our routing table.
//
//  Called during initialization to initialize the routing table.
//
//  Entry: Nothing.
//
//  Returns: True if we succeeded, False if we didn't.
//
int
InitRouting(IPConfigInfo * ci)
{
    int i;
    UINT initStatus;
    ULONG initFlags;

    CTEInitLock(&RouteTableLock.Lock);
    CTEInitBlockStruc(&ForwardFilterBlock);

    DefGWConfigured = 0;
    DefGWActive = 0;

    RtlZeroMemory(&DummyInterface, sizeof(DummyInterface));
    DummyInterface.ri_if.if_xmit = DummyXmit;
    DummyInterface.ri_if.if_transfer = DummyXfer;
    DummyInterface.ri_if.if_close = DummyClose;
    DummyInterface.ri_if.if_invalidate = DummyInvalidate;
    DummyInterface.ri_if.if_qinfo = DummyQInfo;
    DummyInterface.ri_if.if_setinfo = DummySetInfo;
    DummyInterface.ri_if.if_getelist = DummyGetEList;
    DummyInterface.ri_if.if_addaddr = DummyAddAddr;
    DummyInterface.ri_if.if_deladdr = DummyDelAddr;
    DummyInterface.ri_if.if_dondisreq = DummyDoNdisReq;
    DummyInterface.ri_if.if_bcast = IP_LOCAL_BCST;
    DummyInterface.ri_if.if_speed = 10000000;
    DummyInterface.ri_if.if_mtu = 1500;
    DummyInterface.ri_if.if_index = INVALID_IF_INDEX;
    LOCKED_REFERENCE_IF(&DummyInterface.ri_if);
    DummyInterface.ri_if.if_pnpcontext = 0;

    initFlags = ci->ici_fastroutelookup ? TFLAG_FAST_TRIE_ENABLED : 0;
    if ((initStatus = InitRouteTable(initFlags,
                                     ci->ici_fastlookuplevels,
                                     ci->ici_maxfastlookupmemory,
                                     ci->ici_maxnormlookupmemory))
        != STATUS_SUCCESS) {
        TCPTRACE(("Init Route Table Failed: %08x\n", initStatus));
        return FALSE;
    }

    // We've created at least one net. We need to add routing table entries for
    // the global broadcast address, as well as for subnet and net broadcasts,
    // and routing entries for the local subnet. We alse need to add a loopback
    // route for the loopback net. Below, we'll add a host route for ourselves
    // through the loopback net.
    AddRoute(LOOPBACK_ADDR & CLASSA_MASK, CLASSA_MASK, IPADDR_LOCAL,
             LoopNTE->nte_if, LOOPBACK_MSS, 1, IRE_PROTO_LOCAL, ATYPE_PERM,
             0, 0);

    // Route for loopback.
    if ((uchar) ci->ici_gateway) {
        EnableRouter();
    }
    CTEInitTimer(&IPRouteTimer);
    RouteTimerTicks = 0;
#if FFP_SUPPORT
    FFPFlushRequired = FALSE;
#endif
    FlushIFTimerTicks = 0;

    CTEStartTimer(&IPRouteTimer, IP_ROUTE_TIMEOUT, IPRouteTimeout, NULL);
    return TRUE;

}

PVOID
NTAPI
FwPacketAllocate (
    IN POOL_TYPE PoolType,
    IN SIZE_T NumberOfBytes,
    IN ULONG Tag
    )
{
    NDIS_STATUS Status;
    PNDIS_PACKET Packet;

    // Get a packet from our forwarding packet pool.
    //
    NdisAllocatePacket(&Status, &Packet, IpForwardPacketPool);
    if (Status == NDIS_STATUS_SUCCESS) {
        PNDIS_BUFFER Buffer;
        IPHeader *Header;

        // Get an IP header buffer from our IP header pool.
        //
        Buffer = MdpAllocate(IpHeaderPool, &Header);
        if (Buffer) {
            FWContext *FWC = (FWContext *)Packet->ProtocolReserved;

            // Intialize the fowarding context area of the packet.
            //
            RtlZeroMemory(FWC, sizeof(FWContext));
            FWC->fc_hndisbuff = Buffer;
            FWC->fc_hbuff = Header;
            FWC->fc_pc.pc_common.pc_flags = PACKET_FLAG_FW | PACKET_FLAG_IPHDR;

#if MCAST_BUG_TRACKING
            FWC->fc_pc.pc_common.pc_owner = 0;
#else
            FWC->fc_pc.pc_common.pc_owner = PACKET_OWNER_IP;
#endif
            FWC->fc_pc.pc_pi = RtPI;
            FWC->fc_pc.pc_context = Packet;

            return Packet;
        }

        NdisFreePacket(Packet);
    }

    return NULL;
}


VOID
NTAPI
FwPacketFree (
    IN PVOID Buffer
    )
{
    PNDIS_PACKET Packet = (PNDIS_PACKET)Buffer;
    FWContext *FWC = (FWContext *)Packet->ProtocolReserved;

    // Return any IP header to its pool.
    //
    if (FWC->fc_hndisbuff) {
        MdpFree(FWC->fc_hndisbuff);
    }

    NdisFreePacket(Packet);
}


//* InitForwardingPools - Initialize the packet and buffer pools used
//      for forwarding operations.
//
//  Returns: TRUE if the operations succeeded.
//
BOOLEAN InitForwardingPools()
{
    NDIS_STATUS Status;

    // Create our "large" forwarding buffer pool.
    //
    IpForwardLargePool = MdpCreatePool(BUFSIZE_LARGE_POOL, 'lfCT');
    if (!IpForwardLargePool) {
        return FALSE;
    }

    // Create our "small" forwarding buffer pool.
    //
    IpForwardSmallPool = MdpCreatePool(BUFSIZE_SMALL_POOL, 'sfCT');
    if (!IpForwardSmallPool) {
        MdpDestroyPool(IpForwardLargePool);
        IpForwardLargePool = NULL;
        return FALSE;
    }

    // Create our forwarding packet pool.
    //
    NdisAllocatePacketPoolEx(&Status, &IpForwardPacketPool,
                             PACKET_POOL_SIZE, 0, sizeof(FWContext));
    if (Status != NDIS_STATUS_SUCCESS) {
        MdpDestroyPool(IpForwardSmallPool);
        IpForwardSmallPool = NULL;
        MdpDestroyPool(IpForwardLargePool);
        IpForwardLargePool = NULL;
        return FALSE;
    }

    NdisSetPacketPoolProtocolId(IpForwardPacketPool, NDIS_PROTOCOL_ID_TCP_IP);

    return TRUE;
}

//* InitGateway - Initialize our gateway functionality.
//
//  Called during init. time to initialize our gateway functionality. If we're
//  not connfigured as a router, we do nothing. If we are, we allocate the
//  resources we need and do other router initialization.
//
//  Input:  ci  - Config info.
//
//  Returns: TRUE if we succeed, FALSE if don't.
//
uint
InitGateway(IPConfigInfo * ci)
{
    uint FWBufSize, FWPackets;
    uint FWBufCount;
    NDIS_STATUS Status;
    NDIS_HANDLE BufferPool, FWBufferPool, PacketPool;
    IPHeader *HeaderPtr = NULL;
    uchar *FWBuffer = NULL;
    PNDIS_BUFFER Buffer;
    PNDIS_PACKET Packet;
    RouteInterface *RtIF;
    NetTableEntry *NTE;
    uint i;

    // If we're going to be a router, allocate and initialize the resources we
    // need for that.
    BCastRSQ = NULL;
    if (1) {

        RtPI = CTEAllocMemNBoot(sizeof(ProtInfo), 'JiCT');
        if (RtPI == (ProtInfo *) NULL)
            goto failure;

        RtPI->pi_xmitdone = FWSendComplete;

        for (i = 0; i < NET_TABLE_SIZE; i++) {
            NetTableEntry *NetTableList = NewNetTableList[i];
            for (NTE = NetTableList; NTE != NULL; NTE = NTE->nte_next) {
                RtIF = (RouteInterface *) NTE->nte_if;

                RtIF->ri_q.rsq_qh.fq_next = &RtIF->ri_q.rsq_qh;
                RtIF->ri_q.rsq_qh.fq_prev = &RtIF->ri_q.rsq_qh;
                RtIF->ri_q.rsq_running = FALSE;
                RtIF->ri_q.rsq_pending = 0;
                RtIF->ri_q.rsq_qlength = 0;
                CTEInitLock(&RtIF->ri_q.rsq_lock);
            }
        }

        BCastRSQ = CTEAllocMemNBoot(sizeof(RouteSendQ), 'KiCT');

        if (BCastRSQ == (RouteSendQ *) NULL)
            goto failure;

        BCastRSQ->rsq_qh.fq_next = &BCastRSQ->rsq_qh;
        BCastRSQ->rsq_qh.fq_prev = &BCastRSQ->rsq_qh;
        BCastRSQ->rsq_pending = 0;
        BCastRSQ->rsq_maxpending = DEFAULT_MAX_PENDING;
        BCastRSQ->rsq_qlength = 0;
        BCastRSQ->rsq_running = FALSE;
        CTEInitLock(&BCastRSQ->rsq_lock);

        RtIF = (RouteInterface *) &LoopInterface;
        RtIF->ri_q.rsq_maxpending = DEFAULT_MAX_PENDING;

        if (!InitForwardingPools()) {
            goto failure;
        }
    }
    return TRUE;

  failure:
    if (RtPI != NULL)
        CTEFreeMem(RtPI);
    if (BCastRSQ != NULL)
        CTEFreeMem(BCastRSQ);
    if (HeaderPtr != NULL)
        CTEFreeMem(HeaderPtr);
    if (FWBuffer != NULL)
        CTEFreeMem(FWBuffer);

    ForwardBCast = FALSE;
    ForwardPackets = FALSE;
    RouterConfigured = FALSE;
    IPEnableRouterRefCount = (ci->ici_gateway ? 1 : 0);
    return FALSE;

}

NTSTATUS
GetIFAndLink(void *Rce, UINT * IFIndex, IPAddr * NextHop)
{
    RouteTableEntry *RTE = NULL;
    RouteCacheEntry *RCE = (RouteCacheEntry *) Rce;
    Interface *IF;
    KIRQL rtlIrql;

    CTEGetLock(&RouteTableLock.Lock, &rtlIrql);

    if (RCE && (RCE->rce_flags & RCE_VALID) &&
        !(RCE->rce_flags & RCE_LINK_DELETED))
        RTE = RCE->rce_rte;

    if (RTE) {

        if ((IF = IF_FROM_RTE(RTE)) == NULL) {
            CTEFreeLock(&RouteTableLock.Lock, rtlIrql);
            return IP_GENERAL_FAILURE;
        }
        *IFIndex = IF->if_index;
        if (RTE->rte_link) {
            ASSERT(IF->if_flags & IF_FLAGS_P2MP);
            *NextHop = RTE->rte_link->link_NextHop;
        } else
            *NextHop = NULL_IP_ADDR;
        CTEFreeLock(&RouteTableLock.Lock, rtlIrql);
        return IP_SUCCESS;
    }
    CTEFreeLock(&RouteTableLock.Lock, rtlIrql);

    return IP_GENERAL_FAILURE;
}

#pragma END_INIT