1602 lines
51 KiB
C
1602 lines
51 KiB
C
// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil -*- (for GNU Emacs)
|
|
//
|
|
// Copyright (c) 1985-2000 Microsoft Corporation
|
|
//
|
|
// This file is part of the Microsoft Research IPv6 Network Protocol Stack.
|
|
// You should have received a copy of the Microsoft End-User License Agreement
|
|
// for this software along with this release; see the file "license.txt".
|
|
// If not, please see http://www.research.microsoft.com/msripv6/license.htm,
|
|
// or write to Microsoft Research, One Microsoft Way, Redmond, WA 98052-6399.
|
|
//
|
|
// Abstract:
|
|
//
|
|
// Code for TCP Control Block management.
|
|
//
|
|
|
|
|
|
#include "oscfg.h"
|
|
#include "ndis.h"
|
|
#include "ip6imp.h"
|
|
#include "ip6def.h"
|
|
#include "tdi.h"
|
|
#include "tdint.h"
|
|
#include "tdistat.h"
|
|
#include "queue.h"
|
|
#include "transprt.h"
|
|
#include "tcp.h"
|
|
#include "tcb.h"
|
|
#include "tcpconn.h"
|
|
#include "tcpsend.h"
|
|
#include "tcprcv.h"
|
|
#include "info.h"
|
|
#include "tcpcfg.h"
|
|
#include "tcpdeliv.h"
|
|
#include "route.h"
|
|
|
|
KSPIN_LOCK TCBTableLock;
|
|
|
|
uint TCPTime;
|
|
uint TCBWalkCount;
|
|
|
|
TCB **TCBTable;
|
|
|
|
TCB *LastTCB;
|
|
|
|
TCB *PendingFreeList;
|
|
|
|
SLIST_HEADER FreeTCBList;
|
|
|
|
KSPIN_LOCK FreeTCBListLock; // Lock to protect TCB free list.
|
|
|
|
extern KSPIN_LOCK AddrObjTableLock;
|
|
|
|
extern SeqNum ISNMonotonicPortion;
|
|
extern int ISNCredits;
|
|
extern int ISNMaxCredits;
|
|
extern uint GetDeltaTime();
|
|
|
|
|
|
uint CurrentTCBs = 0;
|
|
uint FreeTCBs = 0;
|
|
|
|
uint MaxTCBs = 0xffffffff;
|
|
|
|
#define MAX_FREE_TCBS 1000
|
|
|
|
#define NUM_DEADMAN_TICKS MS_TO_TICKS(1000)
|
|
|
|
uint MaxFreeTCBs = MAX_FREE_TCBS;
|
|
uint DeadmanTicks;
|
|
|
|
KTIMER TCBTimer;
|
|
KDPC TCBTimeoutDpc;
|
|
|
|
//
|
|
// All of the init code can be discarded.
|
|
//
|
|
#ifdef ALLOC_PRAGMA
|
|
|
|
int InitTCB(void);
|
|
|
|
#pragma alloc_text(INIT, InitTCB)
|
|
|
|
#endif // ALLOC_PRAGMA
|
|
|
|
|
|
//* ReadNextTCB - Read the next TCB in the table.
|
|
//
|
|
// Called to read the next TCB in the table. The needed information
|
|
// is derived from the incoming context, which is assumed to be valid.
|
|
// We'll copy the information, and then update the context value with
|
|
// the next TCB to be read.
|
|
//
|
|
uint // Returns: TRUE if more data is available to be read, FALSE is not.
|
|
ReadNextTCB(
|
|
void *Context, // Pointer to a TCPConnContext.
|
|
void *Buffer) // Pointer to a TCPConnTableEntry structure.
|
|
{
|
|
TCPConnContext *TCContext = (TCPConnContext *)Context;
|
|
TCP6ConnTableEntry *TCEntry = (TCP6ConnTableEntry *)Buffer;
|
|
KIRQL OldIrql;
|
|
TCB *CurrentTCB;
|
|
uint i;
|
|
|
|
CurrentTCB = TCContext->tcc_tcb;
|
|
CHECK_STRUCT(CurrentTCB, tcb);
|
|
|
|
KeAcquireSpinLock(&CurrentTCB->tcb_lock, &OldIrql);
|
|
if (CLOSING(CurrentTCB))
|
|
TCEntry->tct_state = TCP_CONN_CLOSED;
|
|
else
|
|
TCEntry->tct_state = (uint)CurrentTCB->tcb_state + TCB_STATE_DELTA;
|
|
TCEntry->tct_localaddr = CurrentTCB->tcb_saddr;
|
|
TCEntry->tct_localscopeid = CurrentTCB->tcb_sscope_id;
|
|
TCEntry->tct_localport = CurrentTCB->tcb_sport;
|
|
TCEntry->tct_remoteaddr = CurrentTCB->tcb_daddr;
|
|
TCEntry->tct_remotescopeid = CurrentTCB->tcb_dscope_id;
|
|
TCEntry->tct_remoteport = CurrentTCB->tcb_dport;
|
|
TCEntry->tct_owningpid = (CurrentTCB->tcb_conn) ?
|
|
CurrentTCB->tcb_conn->tc_owningpid : 0;
|
|
KeReleaseSpinLock(&CurrentTCB->tcb_lock, OldIrql);
|
|
|
|
// We've filled it in. Now update the context.
|
|
if (CurrentTCB->tcb_next != NULL) {
|
|
TCContext->tcc_tcb = CurrentTCB->tcb_next;
|
|
return TRUE;
|
|
} else {
|
|
// NextTCB is NULL. Loop through the TCBTable looking for a new one.
|
|
i = TCContext->tcc_index + 1;
|
|
while (i < TcbTableSize) {
|
|
if (TCBTable[i] != NULL) {
|
|
TCContext->tcc_tcb = TCBTable[i];
|
|
TCContext->tcc_index = i;
|
|
return TRUE;
|
|
break;
|
|
} else
|
|
i++;
|
|
}
|
|
|
|
TCContext->tcc_index = 0;
|
|
TCContext->tcc_tcb = NULL;
|
|
return FALSE;
|
|
}
|
|
}
|
|
|
|
|
|
//* ValidateTCBContext - Validate the context for reading a TCB table.
|
|
//
|
|
// Called to start reading the TCB table sequentially. We take in
|
|
// a context, and if the values are 0 we return information about the
|
|
// first TCB in the table. Otherwise we make sure that the context value
|
|
// is valid, and if it is we return TRUE.
|
|
// We assume the caller holds the TCB table lock.
|
|
//
|
|
// Upon return, *Valid is set to true if the context is valid.
|
|
//
|
|
uint // Returns: TRUE if data in table, FALSE if not.
|
|
ValidateTCBContext(
|
|
void *Context, // Pointer to a TCPConnContext.
|
|
uint *Valid) // Where to return infoformation about context being valid.
|
|
{
|
|
TCPConnContext *TCContext = (TCPConnContext *)Context;
|
|
uint i;
|
|
TCB *TargetTCB;
|
|
TCB *CurrentTCB;
|
|
|
|
i = TCContext->tcc_index;
|
|
TargetTCB = TCContext->tcc_tcb;
|
|
|
|
//
|
|
// If the context values are 0 and NULL, we're starting from the beginning.
|
|
//
|
|
if (i == 0 && TargetTCB == NULL) {
|
|
*Valid = TRUE;
|
|
do {
|
|
if ((CurrentTCB = TCBTable[i]) != NULL) {
|
|
CHECK_STRUCT(CurrentTCB, tcb);
|
|
break;
|
|
}
|
|
i++;
|
|
} while (i < TcbTableSize);
|
|
|
|
if (CurrentTCB != NULL) {
|
|
TCContext->tcc_index = i;
|
|
TCContext->tcc_tcb = CurrentTCB;
|
|
return TRUE;
|
|
} else
|
|
return FALSE;
|
|
|
|
} else {
|
|
//
|
|
// We've been given a context. We just need to make sure that it's
|
|
// valid.
|
|
//
|
|
if (i < TcbTableSize) {
|
|
CurrentTCB = TCBTable[i];
|
|
while (CurrentTCB != NULL) {
|
|
if (CurrentTCB == TargetTCB) {
|
|
*Valid = TRUE;
|
|
return TRUE;
|
|
break;
|
|
} else {
|
|
CurrentTCB = CurrentTCB->tcb_next;
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
// If we get here, we didn't find the matching TCB.
|
|
*Valid = FALSE;
|
|
return FALSE;
|
|
}
|
|
}
|
|
|
|
|
|
//* FindNextTCB - Find the next TCB in a particular chain.
|
|
//
|
|
// This routine is used to find the 'next' TCB in a chain. Since we keep
|
|
// the chain in ascending order, we look for a TCB which is greater than
|
|
// the input TCB. When we find one, we return it.
|
|
//
|
|
// This routine is mostly used when someone is walking the table and needs
|
|
// to free the various locks to perform some action.
|
|
//
|
|
TCB * // Returns: Pointer to the next TCB, or NULL.
|
|
FindNextTCB(
|
|
uint Index, // Index into TCBTable.
|
|
TCB *Current) // Current TCB - we find the one after this one.
|
|
{
|
|
TCB *Next;
|
|
|
|
ASSERT(Index < TcbTableSize);
|
|
|
|
Next = TCBTable[Index];
|
|
|
|
while (Next != NULL && (Next <= Current))
|
|
Next = Next->tcb_next;
|
|
|
|
return Next;
|
|
}
|
|
|
|
|
|
//* ResetSendNext - Set the sendnext value of a TCB.
|
|
//
|
|
// Called to set the send next value of a TCB. We do that, and adjust all
|
|
// pointers to the appropriate places. We assume the caller holds the lock
|
|
// on the TCB.
|
|
//
|
|
void // Returns: Nothing.
|
|
ResetSendNext(
|
|
TCB *SeqTCB, // TCB to be updated.
|
|
SeqNum NewSeq) // Sequence number to set.
|
|
{
|
|
TCPSendReq *SendReq;
|
|
uint AmtForward;
|
|
Queue *CurQ;
|
|
PNDIS_BUFFER Buffer;
|
|
uint Offset;
|
|
|
|
CHECK_STRUCT(SeqTCB, tcb);
|
|
ASSERT(SEQ_GTE(NewSeq, SeqTCB->tcb_senduna));
|
|
|
|
//
|
|
// The new seq must be less than send max, or NewSeq, senduna, sendnext,
|
|
// and sendmax must all be equal (the latter case happens when we're
|
|
// called exiting TIME_WAIT, or possibly when we're retransmitting
|
|
// during a flow controlled situation).
|
|
//
|
|
ASSERT(SEQ_LT(NewSeq, SeqTCB->tcb_sendmax) ||
|
|
(SEQ_EQ(SeqTCB->tcb_senduna, SeqTCB->tcb_sendnext) &&
|
|
SEQ_EQ(SeqTCB->tcb_senduna, SeqTCB->tcb_sendmax) &&
|
|
SEQ_EQ(SeqTCB->tcb_senduna, NewSeq)));
|
|
|
|
AmtForward = NewSeq - SeqTCB->tcb_senduna;
|
|
|
|
if ((AmtForward == 1) && (SeqTCB->tcb_flags & FIN_SENT) &&
|
|
!((SeqTCB->tcb_sendnext - SeqTCB->tcb_senduna) > 1) &&
|
|
(SEQ_EQ(SeqTCB->tcb_sendnext,SeqTCB->tcb_sendmax))) {
|
|
KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_INFO_RARE,
|
|
"tcpip6: trying to set sendnext for FIN_SENT\n"));
|
|
SeqTCB->tcb_sendnext = NewSeq;
|
|
SeqTCB->tcb_flags &= ~FIN_OUTSTANDING;
|
|
return;
|
|
}
|
|
if((SeqTCB->tcb_flags & FIN_SENT) &&
|
|
(SEQ_EQ(SeqTCB->tcb_sendnext,SeqTCB->tcb_sendmax)) &&
|
|
((SeqTCB->tcb_sendnext - NewSeq) == 1) ){
|
|
|
|
//
|
|
// There is only FIN that is left beyond sendnext.
|
|
//
|
|
SeqTCB->tcb_sendnext = NewSeq;
|
|
SeqTCB->tcb_flags &= ~FIN_OUTSTANDING;
|
|
return;
|
|
}
|
|
|
|
|
|
SeqTCB->tcb_sendnext = NewSeq;
|
|
|
|
//
|
|
// If we're backing off send next, turn off the FIN_OUTSTANDING flag to
|
|
// maintain a consistent state.
|
|
//
|
|
if (!SEQ_EQ(NewSeq, SeqTCB->tcb_sendmax))
|
|
SeqTCB->tcb_flags &= ~FIN_OUTSTANDING;
|
|
|
|
if (SYNC_STATE(SeqTCB->tcb_state) && SeqTCB->tcb_state != TCB_TIME_WAIT) {
|
|
//
|
|
// In these states we need to update the send queue.
|
|
//
|
|
|
|
if (!EMPTYQ(&SeqTCB->tcb_sendq)) {
|
|
CurQ = QHEAD(&SeqTCB->tcb_sendq);
|
|
|
|
SendReq = (TCPSendReq *)CONTAINING_RECORD(CurQ, TCPReq, tr_q);
|
|
|
|
//
|
|
// SendReq points to the first send request on the send queue.
|
|
// Move forward AmtForward bytes on the send queue, and set the
|
|
// TCB pointers to the resultant SendReq, buffer, offset, size.
|
|
//
|
|
while (AmtForward) {
|
|
|
|
CHECK_STRUCT(SendReq, tsr);
|
|
|
|
if (AmtForward >= SendReq->tsr_unasize) {
|
|
//
|
|
// We're going to move completely past this one. Subtract
|
|
// his size from AmtForward and get the next one.
|
|
//
|
|
AmtForward -= SendReq->tsr_unasize;
|
|
CurQ = QNEXT(CurQ);
|
|
ASSERT(CurQ != QEND(&SeqTCB->tcb_sendq));
|
|
SendReq = (TCPSendReq *)CONTAINING_RECORD(CurQ, TCPReq,
|
|
tr_q);
|
|
} else {
|
|
//
|
|
// We're pointing at the proper send req now. Break out
|
|
// of this loop and save the information. Further down
|
|
// we'll need to walk down the buffer chain to find
|
|
// the proper buffer and offset.
|
|
//
|
|
break;
|
|
}
|
|
}
|
|
|
|
//
|
|
// We're pointing at the proper send req now. We need to go down
|
|
// the buffer chain here to find the proper buffer and offset.
|
|
//
|
|
SeqTCB->tcb_cursend = SendReq;
|
|
SeqTCB->tcb_sendsize = SendReq->tsr_unasize - AmtForward;
|
|
Buffer = SendReq->tsr_buffer;
|
|
Offset = SendReq->tsr_offset;
|
|
|
|
while (AmtForward) {
|
|
// Walk the buffer chain.
|
|
uint Length;
|
|
|
|
//
|
|
// We'll need the length of this buffer. Use the portable
|
|
// macro to get it. We have to adjust the length by the offset
|
|
// into it, also.
|
|
//
|
|
ASSERT((Offset < NdisBufferLength(Buffer)) ||
|
|
((Offset == 0) && (NdisBufferLength(Buffer) == 0)));
|
|
|
|
Length = NdisBufferLength(Buffer) - Offset;
|
|
|
|
if (AmtForward >= Length) {
|
|
//
|
|
// We're moving past this one. Skip over him, and 0 the
|
|
// Offset we're keeping.
|
|
//
|
|
AmtForward -= Length;
|
|
Offset = 0;
|
|
Buffer = NDIS_BUFFER_LINKAGE(Buffer);
|
|
ASSERT(Buffer != NULL);
|
|
} else
|
|
break;
|
|
}
|
|
|
|
//
|
|
// Save the buffer we found, and the offset into that buffer.
|
|
//
|
|
SeqTCB->tcb_sendbuf = Buffer;
|
|
SeqTCB->tcb_sendofs = Offset + AmtForward;
|
|
|
|
} else {
|
|
ASSERT(SeqTCB->tcb_cursend == NULL);
|
|
ASSERT(AmtForward == 0);
|
|
}
|
|
}
|
|
|
|
CheckTCBSends(SeqTCB);
|
|
}
|
|
|
|
|
|
//* TCPAbortAndIndicateDisconnect
|
|
//
|
|
// Abortively closes a TCB and issues a disconnect indication up the the
|
|
// transport user. This function is used to support cancellation of
|
|
// TDI send and receive requests.
|
|
//
|
|
void // Returns: Nothing.
|
|
TCPAbortAndIndicateDisconnect(
|
|
CONNECTION_CONTEXT ConnectionContext // Connection ID to find a TCB for.
|
|
)
|
|
{
|
|
TCB *AbortTCB;
|
|
KIRQL Irql0, Irql1; // One per lock nesting level.
|
|
TCPConn *Conn;
|
|
|
|
Conn = GetConnFromConnID(PtrToUlong(ConnectionContext), &Irql0);
|
|
|
|
if (Conn != NULL) {
|
|
CHECK_STRUCT(Conn, tc);
|
|
|
|
AbortTCB = Conn->tc_tcb;
|
|
|
|
if (AbortTCB != NULL) {
|
|
//
|
|
// If it's CLOSING or CLOSED, skip it.
|
|
//
|
|
if ((AbortTCB->tcb_state != TCB_CLOSED) && !CLOSING(AbortTCB)) {
|
|
CHECK_STRUCT(AbortTCB, tcb);
|
|
KeAcquireSpinLock(&AbortTCB->tcb_lock, &Irql1);
|
|
KeReleaseSpinLock(&Conn->tc_ConnBlock->cb_lock, Irql1);
|
|
|
|
if (AbortTCB->tcb_state == TCB_CLOSED || CLOSING(AbortTCB)) {
|
|
KeReleaseSpinLock(&AbortTCB->tcb_lock, Irql0);
|
|
return;
|
|
}
|
|
|
|
AbortTCB->tcb_refcnt++;
|
|
AbortTCB->tcb_flags |= NEED_RST; // send a reset if connected
|
|
TryToCloseTCB(AbortTCB, TCB_CLOSE_ABORTED, Irql0);
|
|
|
|
RemoveTCBFromConn(AbortTCB);
|
|
|
|
IF_TCPDBG(TCP_DEBUG_IRP) {
|
|
KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_INFO_TCPDBG,
|
|
"TCPAbortAndIndicateDisconnect, indicating discon\n"));
|
|
}
|
|
|
|
NotifyOfDisc(AbortTCB, TDI_CONNECTION_ABORTED);
|
|
|
|
KeAcquireSpinLock(&AbortTCB->tcb_lock, &Irql0);
|
|
DerefTCB(AbortTCB, Irql0);
|
|
|
|
// TCB lock freed by DerefTCB.
|
|
|
|
return;
|
|
} else
|
|
KeReleaseSpinLock(&Conn->tc_ConnBlock->cb_lock, Irql0);
|
|
} else
|
|
KeReleaseSpinLock(&Conn->tc_ConnBlock->cb_lock, Irql0);
|
|
}
|
|
}
|
|
|
|
|
|
//* TCBTimeout - Do timeout events on TCBs.
|
|
//
|
|
// Called every MS_PER_TICKS milliseconds to do timeout processing on TCBs.
|
|
// We run throught the TCB table, decrementing timers. If one goes to zero
|
|
// we look at its state to decide what to do.
|
|
//
|
|
void // Returns: Nothing.
|
|
TCBTimeout(
|
|
PKDPC MyDpcObject, // The DPC object describing this routine.
|
|
void *Context, // The argument we asked to be called with.
|
|
void *Unused1,
|
|
void *Unused2)
|
|
{
|
|
uint i;
|
|
TCB *CurrentTCB;
|
|
uint Delayed = FALSE;
|
|
uint CallRcvComplete;
|
|
int Delta;
|
|
|
|
UNREFERENCED_PARAMETER(Context);
|
|
UNREFERENCED_PARAMETER(Unused1);
|
|
UNREFERENCED_PARAMETER(Unused2);
|
|
|
|
//
|
|
// Update our free running counter.
|
|
//
|
|
TCPTime++;
|
|
|
|
ExInterlockedAddUlong(&TCBWalkCount, 1, &TCBTableLock);
|
|
|
|
//
|
|
// Set credits so that some more connections can increment the
|
|
// Initial Sequence Number, during the next 100 ms.
|
|
//
|
|
InterlockedExchange(&ISNCredits, ISNMaxCredits);
|
|
|
|
Delta = GetDeltaTime();
|
|
|
|
//
|
|
// The increment made is (256)*(Time in milliseconds). This is really close
|
|
// to 25000 increment made originally every 100 ms.
|
|
//
|
|
if (Delta > 0) {
|
|
Delta *= 0x100;
|
|
InterlockedExchangeAdd(&ISNMonotonicPortion, Delta);
|
|
}
|
|
|
|
//
|
|
// Loop through each bucket in the table, going down the chain of
|
|
// TCBs on the bucket.
|
|
//
|
|
for (i = 0; i < TcbTableSize; i++) {
|
|
TCB *TempTCB;
|
|
uint maxRexmitCnt;
|
|
|
|
CurrentTCB = TCBTable[i];
|
|
|
|
while (CurrentTCB != NULL) {
|
|
CHECK_STRUCT(CurrentTCB, tcb);
|
|
KeAcquireSpinLockAtDpcLevel(&CurrentTCB->tcb_lock);
|
|
|
|
//
|
|
// If it's CLOSING or CLOSED, skip it.
|
|
//
|
|
if (CurrentTCB->tcb_state == TCB_CLOSED || CLOSING(CurrentTCB)) {
|
|
TempTCB = CurrentTCB->tcb_next;
|
|
KeReleaseSpinLockFromDpcLevel(&CurrentTCB->tcb_lock);
|
|
CurrentTCB = TempTCB;
|
|
continue;
|
|
}
|
|
|
|
CheckTCBSends(CurrentTCB);
|
|
CheckTCBRcv(CurrentTCB);
|
|
|
|
//
|
|
// First check the rexmit timer.
|
|
//
|
|
if (TCB_TIMER_RUNNING(CurrentTCB->tcb_rexmittimer)) {
|
|
//
|
|
// The timer is running.
|
|
//
|
|
if (--(CurrentTCB->tcb_rexmittimer) == 0) {
|
|
//
|
|
// And it's fired. Figure out what to do now.
|
|
//
|
|
|
|
if (CurrentTCB->tcb_state == TCB_SYN_SENT) {
|
|
maxRexmitCnt = MaxConnectRexmitCount;
|
|
} else {
|
|
maxRexmitCnt = MaxDataRexmitCount;
|
|
}
|
|
|
|
//
|
|
// If we've run out of retransmits or we're in FIN_WAIT2,
|
|
// time out.
|
|
//
|
|
CurrentTCB->tcb_rexmitcnt++;
|
|
if (CurrentTCB->tcb_rexmitcnt > maxRexmitCnt) {
|
|
|
|
ASSERT(CurrentTCB->tcb_state > TCB_LISTEN);
|
|
|
|
//
|
|
// This connection has timed out. Abort it. First
|
|
// reference him, then mark as closed, notify the
|
|
// user, and finally dereference and close him.
|
|
//
|
|
TimeoutTCB:
|
|
CurrentTCB->tcb_refcnt++;
|
|
TryToCloseTCB(CurrentTCB, TCB_CLOSE_TIMEOUT,
|
|
DISPATCH_LEVEL);
|
|
|
|
RemoveTCBFromConn(CurrentTCB);
|
|
NotifyOfDisc(CurrentTCB, TDI_TIMED_OUT);
|
|
|
|
KeAcquireSpinLockAtDpcLevel(&CurrentTCB->tcb_lock);
|
|
DerefTCB(CurrentTCB, DISPATCH_LEVEL);
|
|
|
|
CurrentTCB = FindNextTCB(i, CurrentTCB);
|
|
continue;
|
|
}
|
|
|
|
//
|
|
// Stop round trip time measurement.
|
|
//
|
|
CurrentTCB->tcb_rtt = 0;
|
|
|
|
//
|
|
// Figure out what our new retransmit timeout should be.
|
|
// We double it each time we get a retransmit, and reset it
|
|
// back when we get an ack for new data.
|
|
//
|
|
CurrentTCB->tcb_rexmit = MIN(CurrentTCB->tcb_rexmit << 1,
|
|
MAX_REXMIT_TO);
|
|
|
|
//
|
|
// Reset the sequence number, and reset the congestion
|
|
// window.
|
|
//
|
|
ResetSendNext(CurrentTCB, CurrentTCB->tcb_senduna);
|
|
|
|
if (!(CurrentTCB->tcb_flags & FLOW_CNTLD)) {
|
|
//
|
|
// Don't let the slow start threshold go below 2
|
|
// segments.
|
|
//
|
|
CurrentTCB->tcb_ssthresh =
|
|
MAX(MIN(CurrentTCB->tcb_cwin,
|
|
CurrentTCB->tcb_sendwin) / 2,
|
|
(uint) CurrentTCB->tcb_mss * 2);
|
|
CurrentTCB->tcb_cwin = CurrentTCB->tcb_mss;
|
|
} else {
|
|
//
|
|
// We're probing, and the probe timer has fired. We
|
|
// need to set the FORCE_OUTPUT bit here.
|
|
//
|
|
CurrentTCB->tcb_flags |= FORCE_OUTPUT;
|
|
}
|
|
|
|
//
|
|
// See if we need to probe for a PMTU black hole.
|
|
//
|
|
if (PMTUBHDetect &&
|
|
CurrentTCB->tcb_rexmitcnt == ((maxRexmitCnt+1)/2)) {
|
|
//
|
|
// We may need to probe for a black hole. If we're
|
|
// doing MTU discovery on this connection and we
|
|
// are retransmitting more than a minimum segment
|
|
// size, or we are probing for a PMTU BH already, turn
|
|
// off the DF flag and bump the probe count. If the
|
|
// probe count gets too big we'll assume it's not
|
|
// a PMTU black hole, and we'll try to switch the
|
|
// router.
|
|
//
|
|
if ((CurrentTCB->tcb_flags & PMTU_BH_PROBE) ||
|
|
(CurrentTCB->tcb_sendmax - CurrentTCB->tcb_senduna
|
|
> 8)) {
|
|
//
|
|
// May need to probe. If we haven't exceeded our
|
|
// probe count, do so, otherwise restore those
|
|
// values.
|
|
//
|
|
if (CurrentTCB->tcb_bhprobecnt++ < 2) {
|
|
//
|
|
// We're going to probe. Turn on the flag,
|
|
// drop the MSS, and turn off the don't
|
|
// fragment bit.
|
|
//
|
|
if (!(CurrentTCB->tcb_flags & PMTU_BH_PROBE)) {
|
|
CurrentTCB->tcb_flags |= PMTU_BH_PROBE;
|
|
CurrentTCB->tcb_slowcount++;
|
|
CurrentTCB->tcb_fastchk |= TCP_FLAG_SLOW;
|
|
//
|
|
// Drop the MSS to the minimum.
|
|
//
|
|
CurrentTCB->tcb_mss =
|
|
MIN(DEFAULT_MSS,
|
|
CurrentTCB->tcb_remmss);
|
|
|
|
ASSERT(CurrentTCB->tcb_mss > 0);
|
|
|
|
CurrentTCB->tcb_cwin = CurrentTCB->tcb_mss;
|
|
}
|
|
|
|
//
|
|
// Drop the rexmit count so we come here again,
|
|
// and don't retrigger DeadGWDetect.
|
|
//
|
|
CurrentTCB->tcb_rexmitcnt--;
|
|
} else {
|
|
//
|
|
// Too many probes. Stop probing, and allow
|
|
// fallover to the next gateway.
|
|
//
|
|
// Currently this code won't do BH probing on
|
|
// the 2nd gateway. The MSS will stay at the
|
|
// minimum size. This might be a little
|
|
// suboptimal, but it's easy to implement for
|
|
// the Sept. 95 service pack and will keep
|
|
// connections alive if possible.
|
|
//
|
|
// In the future we should investigate doing
|
|
// dead g/w detect on a per-connection basis,
|
|
// and then doing PMTU probing for each
|
|
// connection.
|
|
//
|
|
if (CurrentTCB->tcb_flags & PMTU_BH_PROBE) {
|
|
CurrentTCB->tcb_flags &= ~PMTU_BH_PROBE;
|
|
if (--(CurrentTCB->tcb_slowcount) == 0)
|
|
CurrentTCB->tcb_fastchk &=
|
|
~TCP_FLAG_SLOW;
|
|
}
|
|
CurrentTCB->tcb_bhprobecnt = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
//
|
|
// Since we're retransmitting, our first-hop router
|
|
// may be down. Tell IP we're suspicious if this
|
|
// is the first retransmit.
|
|
//
|
|
if (CurrentTCB->tcb_rexmitcnt == 1 &&
|
|
CurrentTCB->tcb_rce != NULL) {
|
|
ForwardReachabilityInDoubt(CurrentTCB->tcb_rce);
|
|
}
|
|
|
|
//
|
|
// Now handle the various cases.
|
|
//
|
|
switch (CurrentTCB->tcb_state) {
|
|
|
|
case TCB_SYN_SENT:
|
|
case TCB_SYN_RCVD:
|
|
//
|
|
// In SYN-SENT or SYN-RCVD we'll need to retransmit
|
|
// the SYN.
|
|
//
|
|
SendSYN(CurrentTCB, DISPATCH_LEVEL);
|
|
CurrentTCB = FindNextTCB(i, CurrentTCB);
|
|
continue;
|
|
|
|
case TCB_FIN_WAIT1:
|
|
case TCB_CLOSING:
|
|
case TCB_LAST_ACK:
|
|
//
|
|
// The call to ResetSendNext (above) will have
|
|
// turned off the FIN_OUTSTANDING flag.
|
|
//
|
|
CurrentTCB->tcb_flags |= FIN_NEEDED;
|
|
|
|
case TCB_CLOSE_WAIT:
|
|
case TCB_ESTAB:
|
|
//
|
|
// In this state we have data to retransmit, unless
|
|
// the window is zero (in which case we need to
|
|
// probe), or we're just sending a FIN.
|
|
//
|
|
CheckTCBSends(CurrentTCB);
|
|
|
|
Delayed = TRUE;
|
|
DelayAction(CurrentTCB, NEED_OUTPUT);
|
|
break;
|
|
|
|
case TCB_TIME_WAIT:
|
|
//
|
|
// If it's fired in TIME-WAIT, we're all done and
|
|
// can clean up. We'll call TryToCloseTCB even
|
|
// though he's already sort of closed. TryToCloseTCB
|
|
// will figure this out and do the right thing.
|
|
//
|
|
TryToCloseTCB(CurrentTCB, TCB_CLOSE_SUCCESS,
|
|
DISPATCH_LEVEL);
|
|
CurrentTCB = FindNextTCB(i, CurrentTCB);
|
|
continue;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
//
|
|
// Now check the SWS deadlock timer..
|
|
//
|
|
if (TCB_TIMER_RUNNING(CurrentTCB->tcb_swstimer)) {
|
|
//
|
|
// The timer is running.
|
|
//
|
|
if (--(CurrentTCB->tcb_swstimer) == 0) {
|
|
//
|
|
// And it's fired. Force output now.
|
|
//
|
|
CurrentTCB->tcb_flags |= FORCE_OUTPUT;
|
|
Delayed = TRUE;
|
|
DelayAction(CurrentTCB, NEED_OUTPUT);
|
|
}
|
|
}
|
|
|
|
//
|
|
// Check the push data timer.
|
|
//
|
|
if (TCB_TIMER_RUNNING(CurrentTCB->tcb_pushtimer)) {
|
|
//
|
|
// The timer is running. Decrement it.
|
|
//
|
|
if (--(CurrentTCB->tcb_pushtimer) == 0) {
|
|
//
|
|
// It's fired.
|
|
//
|
|
PushData(CurrentTCB);
|
|
Delayed = TRUE;
|
|
}
|
|
}
|
|
|
|
//
|
|
// Check the delayed ack timer.
|
|
//
|
|
if (TCB_TIMER_RUNNING(CurrentTCB->tcb_delacktimer)) {
|
|
//
|
|
// The timer is running.
|
|
//
|
|
if (--(CurrentTCB->tcb_delacktimer) == 0) {
|
|
//
|
|
// And it's fired. Set up to send an ACK.
|
|
//
|
|
Delayed = TRUE;
|
|
DelayAction(CurrentTCB, NEED_ACK);
|
|
}
|
|
}
|
|
|
|
//
|
|
// Finally check the keepalive timer.
|
|
//
|
|
if (CurrentTCB->tcb_state == TCB_ESTAB) {
|
|
if ((CurrentTCB->tcb_flags & KEEPALIVE) &&
|
|
(CurrentTCB->tcb_conn != NULL)) {
|
|
uint Delta;
|
|
|
|
Delta = TCPTime - CurrentTCB->tcb_alive;
|
|
if (Delta > CurrentTCB->tcb_conn->tc_tcbkatime) {
|
|
Delta -= CurrentTCB->tcb_conn->tc_tcbkatime;
|
|
if (Delta > (CurrentTCB->tcb_kacount * CurrentTCB->tcb_conn->tc_tcbkainterval)) {
|
|
if (CurrentTCB->tcb_kacount < MaxDataRexmitCount) {
|
|
SendKA(CurrentTCB, DISPATCH_LEVEL);
|
|
CurrentTCB = FindNextTCB(i, CurrentTCB);
|
|
continue;
|
|
} else
|
|
goto TimeoutTCB;
|
|
}
|
|
} else
|
|
CurrentTCB->tcb_kacount = 0;
|
|
}
|
|
}
|
|
|
|
//
|
|
// If this is an active open connection in SYN-SENT or SYN-RCVD,
|
|
// or we have a FIN pending, check the connect timer.
|
|
//
|
|
if (CurrentTCB->tcb_flags &
|
|
(ACTIVE_OPEN | FIN_NEEDED | FIN_SENT)) {
|
|
TCPConnReq *ConnReq = CurrentTCB->tcb_connreq;
|
|
|
|
ASSERT(ConnReq != NULL);
|
|
if (TCB_TIMER_RUNNING(ConnReq->tcr_timeout)) {
|
|
// Timer is running.
|
|
if (--(ConnReq->tcr_timeout) == 0) {
|
|
// The connection timer has timed out.
|
|
TryToCloseTCB(CurrentTCB, TCB_CLOSE_TIMEOUT,
|
|
DISPATCH_LEVEL);
|
|
CurrentTCB = FindNextTCB(i, CurrentTCB);
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
|
|
//
|
|
// Timer isn't running, or didn't fire.
|
|
//
|
|
TempTCB = CurrentTCB->tcb_next;
|
|
KeReleaseSpinLockFromDpcLevel(&CurrentTCB->tcb_lock);
|
|
CurrentTCB = TempTCB;
|
|
}
|
|
}
|
|
|
|
//
|
|
// See if we need to call receive complete as part of deadman processing.
|
|
// We do this now because we want to restart the timer before calling
|
|
// receive complete, in case that takes a while. If we make this check
|
|
// while the timer is running we'd have to lock, so we'll check and save
|
|
// the result now before we start the timer.
|
|
//
|
|
if (DeadmanTicks == TCPTime) {
|
|
CallRcvComplete = TRUE;
|
|
DeadmanTicks += NUM_DEADMAN_TICKS;
|
|
} else
|
|
CallRcvComplete = FALSE;
|
|
|
|
//
|
|
// Now check the pending free list. If it's not null, walk down the
|
|
// list and decrement the walk count. If the count goes below 2, pull it
|
|
// from the list. If the count goes to 0, free the TCB. If the count is
|
|
// at 1 it'll be freed by whoever called RemoveTCB.
|
|
//
|
|
KeAcquireSpinLockAtDpcLevel(&TCBTableLock);
|
|
if (PendingFreeList != NULL) {
|
|
TCB *PrevTCB;
|
|
|
|
PrevTCB = CONTAINING_RECORD(&PendingFreeList, TCB, tcb_delayq.q_next);
|
|
|
|
do {
|
|
CurrentTCB = (TCB *)PrevTCB->tcb_delayq.q_next;
|
|
|
|
CHECK_STRUCT(CurrentTCB, tcb);
|
|
|
|
CurrentTCB->tcb_walkcount--;
|
|
if (CurrentTCB->tcb_walkcount <= 1) {
|
|
*(TCB **)&PrevTCB->tcb_delayq.q_next =
|
|
(TCB *)CurrentTCB->tcb_delayq.q_next;
|
|
|
|
if (CurrentTCB->tcb_walkcount == 0) {
|
|
FreeTCB(CurrentTCB);
|
|
}
|
|
} else {
|
|
PrevTCB = CurrentTCB;
|
|
}
|
|
} while (PrevTCB->tcb_delayq.q_next != NULL);
|
|
}
|
|
|
|
TCBWalkCount--;
|
|
KeReleaseSpinLockFromDpcLevel(&TCBTableLock);
|
|
|
|
//
|
|
// Do AddrCheckTable cleanup.
|
|
//
|
|
if (AddrCheckTable) {
|
|
|
|
TCPAddrCheckElement *Temp;
|
|
|
|
KeAcquireSpinLockAtDpcLevel(&AddrObjTableLock);
|
|
|
|
for (Temp = AddrCheckTable;Temp < AddrCheckTable + NTWMaxConnectCount;
|
|
Temp++) {
|
|
if (Temp->TickCount > 0) {
|
|
if ((--(Temp->TickCount)) == 0) {
|
|
Temp->SourceAddress = UnspecifiedAddr;
|
|
}
|
|
}
|
|
}
|
|
|
|
KeReleaseSpinLockFromDpcLevel(&AddrObjTableLock);
|
|
}
|
|
|
|
if (Delayed)
|
|
ProcessTCBDelayQ();
|
|
|
|
if (CallRcvComplete)
|
|
TCPRcvComplete();
|
|
}
|
|
|
|
|
|
#if 0 // We update PMTU lazily to avoid exactly this.
|
|
//* SetTCBMTU - Set TCB MTU values.
|
|
//
|
|
// A function called by TCBWalk to set the MTU values of all TCBs using
|
|
// a particular path.
|
|
//
|
|
uint // Returns: TRUE.
|
|
SetTCBMTU(
|
|
TCB *CheckTCB, // TCB to be checked.
|
|
void *DestPtr, // Destination address.
|
|
void *SrcPtr, // Source address.
|
|
void *MTUPtr) // New MTU.
|
|
{
|
|
IPv6Addr *DestAddr = (IPv6Addr *)DestPtr;
|
|
IPv6Addr *SrcAddr = (IPv6Addr *)SrcPtr;
|
|
KIRQL OldIrql;
|
|
|
|
CHECK_STRUCT(CheckTCB, tcb);
|
|
|
|
KeAcquireSpinLock(&CheckTCB->tcb_lock, &OldIrql);
|
|
|
|
if (IP6_ADDR_EQUAL(&CheckTCB->tcb_daddr, DestAddr) &&
|
|
IP6_ADDR_EQUAL(&CheckTCB->tcb_saddr, SrcAddr)) {
|
|
uint MTU = *(uint *)MTUPtr;
|
|
|
|
CheckTCB->tcb_mss = (ushort)MIN(MTU, (uint)CheckTCB->tcb_remmss);
|
|
|
|
ASSERT(CheckTCB->tcb_mss > 0);
|
|
|
|
//
|
|
// Reset the Congestion Window if necessary.
|
|
//
|
|
if (CheckTCB->tcb_cwin < CheckTCB->tcb_mss) {
|
|
CheckTCB->tcb_cwin = CheckTCB->tcb_mss;
|
|
|
|
//
|
|
// Make sure the slow start threshold is at least 2 segments.
|
|
//
|
|
if (CheckTCB->tcb_ssthresh < ((uint) CheckTCB->tcb_mss*2)) {
|
|
CheckTCB->tcb_ssthresh = CheckTCB->tcb_mss * 2;
|
|
}
|
|
}
|
|
}
|
|
|
|
KeReleaseSpinLock(&CheckTCB->tcb_lock, OldIrql);
|
|
|
|
return TRUE;
|
|
}
|
|
#endif
|
|
|
|
//* DeleteTCBWithSrc - Delete tcbs with a particular src address.
|
|
//
|
|
// A function called by TCBWalk to delete all TCBs with a particular source
|
|
// address.
|
|
//
|
|
uint // Returns: FALSE if CheckTCB is to be deleted, TRUE otherwise.
|
|
DeleteTCBWithSrc(
|
|
TCB *CheckTCB, // TCB to be checked.
|
|
void *AddrPtr, // Pointer to address.
|
|
void *Unused1, // Go figure.
|
|
void *Unused3) // What happened to Unused2?
|
|
{
|
|
IPv6Addr *Addr = (IPv6Addr *)AddrPtr;
|
|
|
|
CHECK_STRUCT(CheckTCB, tcb);
|
|
|
|
if (IP6_ADDR_EQUAL(&CheckTCB->tcb_saddr, Addr))
|
|
return FALSE;
|
|
else
|
|
return TRUE;
|
|
}
|
|
|
|
//* TCBWalk - Walk the TCBs in the table, and call a function for each of them.
|
|
//
|
|
// Called when we need to repetively do something to each TCB in the table.
|
|
// We call the specified function with a pointer to the TCB and the input
|
|
// context for each TCB in the table. If the function returns FALSE, we
|
|
// delete the TCB.
|
|
//
|
|
void // Returns: Nothing.
|
|
TCBWalk(
|
|
uint (*CallRtn)(struct TCB *, void *, void *, void *), // Routine to call.
|
|
void *Context1, // Context to pass to CallRtn.
|
|
void *Context2, // Second context to pass to call routine.
|
|
void *Context3) // Third context to pass to call routine.
|
|
{
|
|
uint i;
|
|
TCB *CurTCB;
|
|
KIRQL Irql0, Irql1;
|
|
|
|
//
|
|
// Loop through each bucket in the table, going down the chain of
|
|
// TCBs on the bucket. For each one call CallRtn.
|
|
//
|
|
KeAcquireSpinLock(&TCBTableLock, &Irql0);
|
|
|
|
for (i = 0; i < TcbTableSize; i++) {
|
|
|
|
CurTCB = TCBTable[i];
|
|
|
|
//
|
|
// Walk down the chain on this bucket.
|
|
//
|
|
while (CurTCB != NULL) {
|
|
if (!(*CallRtn)(CurTCB, Context1, Context2, Context3)) {
|
|
//
|
|
// Call failed on this one.
|
|
// Notify the client and close the TCB.
|
|
//
|
|
KeAcquireSpinLock(&CurTCB->tcb_lock, &Irql1);
|
|
if (!CLOSING(CurTCB)) {
|
|
CurTCB->tcb_refcnt++;
|
|
KeReleaseSpinLock(&TCBTableLock, Irql1);
|
|
TryToCloseTCB(CurTCB, TCB_CLOSE_ABORTED, Irql0);
|
|
|
|
RemoveTCBFromConn(CurTCB);
|
|
if (CurTCB->tcb_state != TCB_TIME_WAIT)
|
|
NotifyOfDisc(CurTCB, TDI_CONNECTION_ABORTED);
|
|
|
|
KeAcquireSpinLock(&CurTCB->tcb_lock, &Irql0);
|
|
DerefTCB(CurTCB, Irql0);
|
|
KeAcquireSpinLock(&TCBTableLock, &Irql0);
|
|
} else
|
|
KeReleaseSpinLock(&CurTCB->tcb_lock, Irql1);
|
|
|
|
CurTCB = FindNextTCB(i, CurTCB);
|
|
} else {
|
|
CurTCB = CurTCB->tcb_next;
|
|
}
|
|
}
|
|
}
|
|
|
|
KeReleaseSpinLock(&TCBTableLock, Irql0);
|
|
}
|
|
|
|
//* FindTCB - Find a TCB in the tcb table.
|
|
//
|
|
// Called when we need to find a TCB in the TCB table. We take a quick
|
|
// look at the last TCB we found, and if it matches we return it. Otherwise
|
|
// we hash into the TCB table and look for it. We assume the TCB table lock
|
|
// is held when we are called.
|
|
//
|
|
TCB * // Returns: Pointer to TCB found, or NULL if none.
|
|
FindTCB(
|
|
IPv6Addr *Src, // Source IP address of TCB to be found.
|
|
IPv6Addr *Dest, // Destination IP address of TCB to be found.
|
|
uint SrcScopeId, // Source address scope identifier.
|
|
uint DestScopeId, // Destination address scope identifier.
|
|
ushort SrcPort, // Source port of TCB to be found.
|
|
ushort DestPort) // Destination port of TCB to be found.
|
|
{
|
|
TCB *FoundTCB;
|
|
|
|
if (LastTCB != NULL) {
|
|
CHECK_STRUCT(LastTCB, tcb);
|
|
if (IP6_ADDR_EQUAL(&LastTCB->tcb_daddr, Dest) &&
|
|
LastTCB->tcb_dscope_id == DestScopeId &&
|
|
LastTCB->tcb_dport == DestPort &&
|
|
IP6_ADDR_EQUAL(&LastTCB->tcb_saddr, Src) &&
|
|
LastTCB->tcb_sscope_id == SrcScopeId &&
|
|
LastTCB->tcb_sport == SrcPort)
|
|
return LastTCB;
|
|
}
|
|
|
|
//
|
|
// Didn't find it in our 1 element cache.
|
|
//
|
|
FoundTCB = TCBTable[TCB_HASH(*Dest, *Src, DestPort, SrcPort)];
|
|
while (FoundTCB != NULL) {
|
|
CHECK_STRUCT(FoundTCB, tcb);
|
|
if (IP6_ADDR_EQUAL(&FoundTCB->tcb_daddr, Dest) &&
|
|
FoundTCB->tcb_dscope_id == DestScopeId &&
|
|
FoundTCB->tcb_dport == DestPort &&
|
|
IP6_ADDR_EQUAL(&FoundTCB->tcb_saddr, Src) &&
|
|
FoundTCB->tcb_sscope_id == SrcScopeId &&
|
|
FoundTCB->tcb_sport == SrcPort) {
|
|
|
|
//
|
|
// Found it. Update the cache for next time, and return.
|
|
//
|
|
LastTCB = FoundTCB;
|
|
return FoundTCB;
|
|
} else
|
|
FoundTCB = FoundTCB->tcb_next;
|
|
}
|
|
|
|
return FoundTCB;
|
|
}
|
|
|
|
|
|
//* InsertTCB - Insert a TCB in the tcb table.
|
|
//
|
|
// This routine inserts a TCB in the TCB table. No locks need to be held
|
|
// when this routine is called. We insert TCBs in ascending address order.
|
|
// Before inserting we make sure that the TCB isn't already in the table.
|
|
//
|
|
uint // Returns: TRUE if we inserted, false if we didn't.
|
|
InsertTCB(
|
|
TCB *NewTCB) // TCB to be inserted.
|
|
{
|
|
uint TCBIndex;
|
|
KIRQL OldIrql;
|
|
TCB *PrevTCB, *CurrentTCB;
|
|
TCB *WhereToInsert;
|
|
|
|
ASSERT(NewTCB != NULL);
|
|
CHECK_STRUCT(NewTCB, tcb);
|
|
TCBIndex = TCB_HASH(NewTCB->tcb_daddr, NewTCB->tcb_saddr,
|
|
NewTCB->tcb_dport, NewTCB->tcb_sport);
|
|
|
|
KeAcquireSpinLock(&TCBTableLock, &OldIrql);
|
|
KeAcquireSpinLockAtDpcLevel(&NewTCB->tcb_lock);
|
|
|
|
//
|
|
// Find the proper place in the table to insert him. While
|
|
// we're walking we'll check to see if a dupe already exists.
|
|
// When we find the right place to insert, we'll remember it, and
|
|
// keep walking looking for a duplicate.
|
|
//
|
|
PrevTCB = CONTAINING_RECORD(&TCBTable[TCBIndex], TCB, tcb_next);
|
|
WhereToInsert = NULL;
|
|
|
|
while (PrevTCB->tcb_next != NULL) {
|
|
CurrentTCB = PrevTCB->tcb_next;
|
|
|
|
if (IP6_ADDR_EQUAL(&CurrentTCB->tcb_daddr, &NewTCB->tcb_daddr) &&
|
|
IP6_ADDR_EQUAL(&CurrentTCB->tcb_saddr, &NewTCB->tcb_saddr) &&
|
|
(CurrentTCB->tcb_sport == NewTCB->tcb_sport) &&
|
|
(CurrentTCB->tcb_dport == NewTCB->tcb_dport)) {
|
|
|
|
KeReleaseSpinLockFromDpcLevel(&NewTCB->tcb_lock);
|
|
KeReleaseSpinLock(&TCBTableLock, OldIrql);
|
|
return FALSE;
|
|
|
|
} else {
|
|
|
|
if (WhereToInsert == NULL && CurrentTCB > NewTCB) {
|
|
WhereToInsert = PrevTCB;
|
|
}
|
|
|
|
CHECK_STRUCT(PrevTCB->tcb_next, tcb);
|
|
PrevTCB = PrevTCB->tcb_next;
|
|
}
|
|
}
|
|
|
|
if (WhereToInsert == NULL) {
|
|
WhereToInsert = PrevTCB;
|
|
}
|
|
|
|
NewTCB->tcb_next = WhereToInsert->tcb_next;
|
|
WhereToInsert->tcb_next = NewTCB;
|
|
NewTCB->tcb_flags |= IN_TCB_TABLE;
|
|
TStats.ts_numconns++;
|
|
|
|
KeReleaseSpinLockFromDpcLevel(&NewTCB->tcb_lock);
|
|
KeReleaseSpinLock(&TCBTableLock, OldIrql);
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
//* RemoveTCB - Remove a TCB from the tcb table.
|
|
//
|
|
// Called when we need to remove a TCB from the TCB table. We assume the
|
|
// TCB table lock and the TCB lock are held when we are called. If the
|
|
// TCB isn't in the table we won't try to remove him.
|
|
//
|
|
uint // Returns: TRUE if it's OK to free it, FALSE otherwise.
|
|
RemoveTCB(
|
|
TCB *RemovedTCB) // TCB to be removed.
|
|
{
|
|
uint TCBIndex;
|
|
TCB *PrevTCB;
|
|
#if DBG
|
|
uint Found = FALSE;
|
|
#endif
|
|
|
|
CHECK_STRUCT(RemovedTCB, tcb);
|
|
|
|
if (RemovedTCB->tcb_flags & IN_TCB_TABLE) {
|
|
TCBIndex = TCB_HASH(RemovedTCB->tcb_daddr, RemovedTCB->tcb_saddr,
|
|
RemovedTCB->tcb_dport, RemovedTCB->tcb_sport);
|
|
|
|
PrevTCB = CONTAINING_RECORD(&TCBTable[TCBIndex], TCB, tcb_next);
|
|
|
|
do {
|
|
if (PrevTCB->tcb_next == RemovedTCB) {
|
|
// Found him.
|
|
PrevTCB->tcb_next = RemovedTCB->tcb_next;
|
|
RemovedTCB->tcb_flags &= ~IN_TCB_TABLE;
|
|
TStats.ts_numconns--;
|
|
#if DBG
|
|
Found = TRUE;
|
|
#endif
|
|
break;
|
|
}
|
|
PrevTCB = PrevTCB->tcb_next;
|
|
#if DBG
|
|
if (PrevTCB != NULL)
|
|
CHECK_STRUCT(PrevTCB, tcb);
|
|
#endif
|
|
} while (PrevTCB != NULL);
|
|
|
|
ASSERT(Found);
|
|
}
|
|
|
|
if (LastTCB == RemovedTCB)
|
|
LastTCB = NULL;
|
|
|
|
if (TCBWalkCount == 0) {
|
|
return TRUE;
|
|
} else {
|
|
RemovedTCB->tcb_walkcount = TCBWalkCount + 1;
|
|
*(TCB **)&RemovedTCB->tcb_delayq.q_next = PendingFreeList;
|
|
PendingFreeList = RemovedTCB;
|
|
return FALSE;
|
|
}
|
|
}
|
|
|
|
|
|
//* ScavengeTCB - Scavenge a TCB that's in the TIME_WAIT state.
|
|
//
|
|
// Called when we're running low on TCBs, and need to scavenge one from
|
|
// TIME_WAIT state. We'll walk through the TCB table, looking for the oldest
|
|
// TCB in TIME_WAIT. We'll remove and return a pointer to that TCB. If we
|
|
// don't find any TCBs in TIME_WAIT, we'll return NULL.
|
|
//
|
|
TCB * // Returns: Pointer to a reusable TCB, or NULL.
|
|
ScavengeTCB(
|
|
void)
|
|
{
|
|
KIRQL Irql0, Irql1, IrqlSave;
|
|
uint Now = SystemUpTime();
|
|
uint Delta = 0;
|
|
uint i;
|
|
TCB *FoundTCB = NULL, *PrevFound;
|
|
TCB *CurrentTCB, *PrevTCB;
|
|
|
|
KeAcquireSpinLock(&TCBTableLock, &Irql0);
|
|
|
|
if (TCBWalkCount != 0) {
|
|
KeReleaseSpinLock(&TCBTableLock, Irql0);
|
|
return NULL;
|
|
}
|
|
|
|
for (i = 0; i < TcbTableSize; i++) {
|
|
|
|
PrevTCB = CONTAINING_RECORD(&TCBTable[i], TCB, tcb_next);
|
|
CurrentTCB = PrevTCB->tcb_next;
|
|
|
|
while (CurrentTCB != NULL) {
|
|
CHECK_STRUCT(CurrentTCB, tcb);
|
|
|
|
KeAcquireSpinLock(&CurrentTCB->tcb_lock, &Irql1);
|
|
if (CurrentTCB->tcb_state == TCB_TIME_WAIT &&
|
|
(CurrentTCB->tcb_refcnt == 0) && !CLOSING(CurrentTCB)){
|
|
if (FoundTCB == NULL ||
|
|
((Now - CurrentTCB->tcb_alive) > Delta)) {
|
|
//
|
|
// Found a new 'older' TCB. If we already have one, free
|
|
// the lock on him and get the lock on the new one.
|
|
//
|
|
if (FoundTCB != NULL)
|
|
KeReleaseSpinLock(&FoundTCB->tcb_lock, Irql1);
|
|
else
|
|
IrqlSave = Irql1;
|
|
|
|
PrevFound = PrevTCB;
|
|
FoundTCB = CurrentTCB;
|
|
Delta = Now - FoundTCB->tcb_alive;
|
|
} else
|
|
KeReleaseSpinLock(&CurrentTCB->tcb_lock, Irql1);
|
|
} else
|
|
KeReleaseSpinLock(&CurrentTCB->tcb_lock, Irql1);
|
|
|
|
//
|
|
// Look at the next one.
|
|
//
|
|
PrevTCB = CurrentTCB;
|
|
CurrentTCB = PrevTCB->tcb_next;
|
|
}
|
|
}
|
|
|
|
//
|
|
// If we have one, pull him from the list.
|
|
//
|
|
if (FoundTCB != NULL) {
|
|
PrevFound->tcb_next = FoundTCB->tcb_next;
|
|
FoundTCB->tcb_flags &= ~IN_TCB_TABLE;
|
|
|
|
//
|
|
// REVIEW: Is the right place to drop the reference on our RCE?
|
|
// REVIEW: IPv4 called down to IP to close the RCE here.
|
|
//
|
|
if (FoundTCB->tcb_rce != NULL)
|
|
ReleaseRCE(FoundTCB->tcb_rce);
|
|
|
|
TStats.ts_numconns--;
|
|
if (LastTCB == FoundTCB) {
|
|
LastTCB = NULL;
|
|
}
|
|
KeReleaseSpinLock(&FoundTCB->tcb_lock, IrqlSave);
|
|
}
|
|
|
|
KeReleaseSpinLock(&TCBTableLock, Irql0);
|
|
return FoundTCB;
|
|
}
|
|
|
|
|
|
//* AllocTCB - Allocate a TCB.
|
|
//
|
|
// Called whenever we need to allocate a TCB. We try to pull one off the
|
|
// free list, or allocate one if we need one. We then initialize it, etc.
|
|
//
|
|
TCB * // Returns: Pointer to the new TCB, or NULL if we couldn't get one.
|
|
AllocTCB(
|
|
void)
|
|
{
|
|
TCB *NewTCB;
|
|
|
|
//
|
|
// First, see if we have one on the free list.
|
|
//
|
|
PSLIST_ENTRY BufferLink;
|
|
|
|
BufferLink = ExInterlockedPopEntrySList(&FreeTCBList, &FreeTCBListLock);
|
|
|
|
if (BufferLink != NULL) {
|
|
NewTCB = CONTAINING_RECORD(BufferLink, TCB, tcb_next);
|
|
CHECK_STRUCT(NewTCB, tcb);
|
|
ExInterlockedAddUlong(&FreeTCBs, -1, &FreeTCBListLock);
|
|
} else {
|
|
//
|
|
// We have none on the free list. If the total number of TCBs
|
|
// outstanding is more than we like to keep on the free list, try
|
|
// to scavenge a TCB from time wait.
|
|
//
|
|
if (CurrentTCBs < MaxFreeTCBs || ((NewTCB = ScavengeTCB()) == NULL)) {
|
|
if (CurrentTCBs < MaxTCBs) {
|
|
NewTCB = ExAllocatePool(NonPagedPool, sizeof(TCB));
|
|
if (NewTCB == NULL) {
|
|
return NewTCB;
|
|
} else {
|
|
ExInterlockedAddUlong(&CurrentTCBs, 1, &FreeTCBListLock);
|
|
}
|
|
} else
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
ASSERT(NewTCB != NULL);
|
|
|
|
RtlZeroMemory(NewTCB, sizeof(TCB));
|
|
#if DBG
|
|
NewTCB->tcb_sig = tcb_signature;
|
|
#endif
|
|
INITQ(&NewTCB->tcb_sendq);
|
|
NewTCB->tcb_cursend = NULL;
|
|
NewTCB->tcb_alive = TCPTime;
|
|
NewTCB->tcb_hops = -1;
|
|
|
|
//
|
|
// Initially we're not on the fast path because we're not established. Set
|
|
// the slowcount to one and set up the fastchk fields so we don't take the
|
|
// fast path.
|
|
//
|
|
NewTCB->tcb_slowcount = 1;
|
|
NewTCB->tcb_fastchk = TCP_FLAG_ACK | TCP_FLAG_SLOW;
|
|
KeInitializeSpinLock(&NewTCB->tcb_lock);
|
|
|
|
return NewTCB;
|
|
}
|
|
|
|
|
|
//* FreeTCB - Free a TCB.
|
|
//
|
|
// Called whenever we need to free a TCB.
|
|
//
|
|
// Note: This routine may be called with the TCBTableLock held.
|
|
//
|
|
void // Returns: Nothing.
|
|
FreeTCB(
|
|
TCB *FreedTCB) // TCB to be freed.
|
|
{
|
|
PSLIST_ENTRY BufferLink;
|
|
|
|
CHECK_STRUCT(FreedTCB, tcb);
|
|
|
|
#if defined(_WIN64)
|
|
if (CurrentTCBs > 2 * MaxFreeTCBs) {
|
|
|
|
#else
|
|
if ((CurrentTCBs > 2 * MaxFreeTCBs) || (FreeTCBList.Depth > 65000)) {
|
|
|
|
#endif
|
|
ExInterlockedAddUlong(&CurrentTCBs, (ulong) - 1, &FreeTCBListLock);
|
|
ExFreePool(FreedTCB);
|
|
return;
|
|
}
|
|
|
|
BufferLink = CONTAINING_RECORD(&(FreedTCB->tcb_next),
|
|
SLIST_ENTRY, Next);
|
|
ExInterlockedPushEntrySList(&FreeTCBList, BufferLink, &FreeTCBListLock);
|
|
ExInterlockedAddUlong(&FreeTCBs, 1, &FreeTCBListLock);
|
|
}
|
|
|
|
|
|
#pragma BEGIN_INIT
|
|
|
|
//* InitTCB - Initialize our TCB code.
|
|
//
|
|
// Called during init time to initialize our TCB code. We initialize
|
|
// the TCB table, etc, then return.
|
|
//
|
|
int // Returns: TRUE if we did initialize, false if we didn't.
|
|
InitTCB(
|
|
void)
|
|
{
|
|
LARGE_INTEGER InitialWakeUp;
|
|
uint i;
|
|
|
|
TCBTable = ExAllocatePool(NonPagedPool, TcbTableSize * sizeof(TCB*));
|
|
if (TCBTable == NULL) {
|
|
return FALSE;
|
|
}
|
|
|
|
for (i = 0; i < TcbTableSize; i++)
|
|
TCBTable[i] = NULL;
|
|
|
|
LastTCB = NULL;
|
|
|
|
ExInitializeSListHead(&FreeTCBList);
|
|
|
|
KeInitializeSpinLock(&TCBTableLock);
|
|
KeInitializeSpinLock(&FreeTCBListLock);
|
|
|
|
TCPTime = 0;
|
|
TCBWalkCount = 0;
|
|
DeadmanTicks = NUM_DEADMAN_TICKS;
|
|
|
|
//
|
|
// Set up our timer to call TCBTimeout once every MS_PER_TICK milliseconds.
|
|
//
|
|
// REVIEW: Switch this to be driven off the IPv6Timeout routine instead
|
|
// REVIEW: of having two independent timers?
|
|
//
|
|
KeInitializeDpc(&TCBTimeoutDpc, TCBTimeout, NULL);
|
|
KeInitializeTimer(&TCBTimer);
|
|
InitialWakeUp.QuadPart = -(LONGLONG) MS_PER_TICK * 10000;
|
|
KeSetTimerEx(&TCBTimer, InitialWakeUp, MS_PER_TICK, &TCBTimeoutDpc);
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
#pragma END_INIT
|
|
|
|
|
|
//* UnloadTCB
|
|
//
|
|
// Called during shutdown to uninitialize
|
|
// in preparation for unloading the stack.
|
|
//
|
|
// There are no open sockets (or else we wouldn't be unloading).
|
|
// Because UnloadTCPSend has already been called,
|
|
// we are no longer receiving packets from the IPv6 layer.
|
|
//
|
|
void
|
|
UnloadTCB(void)
|
|
{
|
|
PSLIST_ENTRY BufferLink;
|
|
TCB *CurrentTCB;
|
|
uint i;
|
|
KIRQL OldIrql;
|
|
|
|
//
|
|
// First stop TCBTimeout from being called.
|
|
//
|
|
KeCancelTimer(&TCBTimer);
|
|
|
|
//
|
|
// Traverse the buckets looking for TCBs.
|
|
// REVIEW - Can we have TCBs in states other than time-wait?
|
|
//
|
|
for (i = 0; i < TcbTableSize; i++) {
|
|
|
|
while ((CurrentTCB = TCBTable[i]) != NULL) {
|
|
|
|
KeAcquireSpinLock(&CurrentTCB->tcb_lock, &OldIrql);
|
|
|
|
KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_INFO_STATE,
|
|
"UnloadTCB(%p): state %x flags %x refs %x "
|
|
"reason %x pend %x walk %x\n",
|
|
CurrentTCB,
|
|
CurrentTCB->tcb_state,
|
|
CurrentTCB->tcb_flags,
|
|
CurrentTCB->tcb_refcnt,
|
|
CurrentTCB->tcb_closereason,
|
|
CurrentTCB->tcb_pending,
|
|
CurrentTCB->tcb_walkcount));
|
|
|
|
CurrentTCB->tcb_flags |= NEED_RST;
|
|
TryToCloseTCB(CurrentTCB, TCB_CLOSE_ABORTED, OldIrql);
|
|
}
|
|
}
|
|
|
|
//
|
|
// Now pull TCBs off the free list and really free them.
|
|
//
|
|
while ((BufferLink = ExInterlockedPopEntrySList(&FreeTCBList, &FreeTCBListLock)) != NULL) {
|
|
CurrentTCB = CONTAINING_RECORD(BufferLink, TCB, tcb_next);
|
|
CHECK_STRUCT(CurrentTCB, tcb);
|
|
|
|
ExFreePool(CurrentTCB);
|
|
}
|
|
|
|
ExFreePool(TCBTable);
|
|
TCBTable = NULL;
|
|
}
|
|
|
|
//* CleanupTCBWithIF
|
|
//
|
|
// Helper function for TCBWalk, to remove
|
|
// TCBs that reference the specified interface.
|
|
//
|
|
// Returns FALSE if CheckTCB should be deleted, TRUE otherwise.
|
|
//
|
|
uint
|
|
CleanupTCBWithIF(
|
|
TCB *CheckTCB,
|
|
void *Context1,
|
|
void *Context2,
|
|
void *Context3)
|
|
{
|
|
Interface *IF = (Interface *) Context1;
|
|
RouteCacheEntry *RCE;
|
|
KIRQL OldIrql;
|
|
|
|
CHECK_STRUCT(CheckTCB, tcb);
|
|
|
|
RCE = CheckTCB->tcb_rce;
|
|
if (RCE != NULL) {
|
|
ASSERT(RCE->NTE->IF == RCE->NCE->IF);
|
|
|
|
if (RCE->NTE->IF == IF)
|
|
return FALSE; // Delete this TCB.
|
|
}
|
|
|
|
return TRUE; // Do not delete this TCB.
|
|
}
|
|
|
|
//* TCPRemoveIF
|
|
//
|
|
// Remove TCP's references to the specified interface.
|
|
//
|
|
void
|
|
TCPRemoveIF(Interface *IF)
|
|
{
|
|
//
|
|
// Currently, only TCBs hold onto references.
|
|
//
|
|
TCBWalk(CleanupTCBWithIF, IF, NULL, NULL);
|
|
}
|