787 lines
24 KiB
C
787 lines
24 KiB
C
/*++
|
||
|
||
Copyright (c) 1996-1997 Microsoft Corporation
|
||
|
||
Module Name:
|
||
|
||
resfail.c
|
||
|
||
Abstract:
|
||
|
||
Cluster resource state management routines.
|
||
|
||
Author:
|
||
|
||
Mike Massa (mikemas) 14-Jan-1996
|
||
|
||
|
||
Revision History:
|
||
|
||
--*/
|
||
|
||
#include "fmp.h"
|
||
|
||
#define LOG_MODULE RESFAIL
|
||
|
||
// globals
|
||
|
||
//
|
||
// Local Functions
|
||
//
|
||
|
||
DWORD
|
||
FmpHandleResStateChangeProc(
|
||
IN LPVOID pContext
|
||
);
|
||
|
||
|
||
VOID
|
||
FmpHandleResourceFailure(
|
||
IN PFM_RESOURCE pResource
|
||
)
|
||
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
Handles resource failure notifications from the resource monitor.
|
||
|
||
Arguments:
|
||
|
||
Resource - The resource which has failed.
|
||
|
||
Return Value:
|
||
|
||
None.
|
||
|
||
Note:
|
||
|
||
This routine is only called if the resource was online at the time of
|
||
the failure.
|
||
|
||
--*/
|
||
{
|
||
DWORD dwStatus;
|
||
BOOL bRestartGroup = TRUE;
|
||
DWORD tickCount;
|
||
DWORD withinFailurePeriod;
|
||
|
||
CsLogEvent1(LOG_CRITICAL,
|
||
FM_RESOURCE_FAILURE,
|
||
OmObjectName(pResource) );
|
||
|
||
ClRtlLogPrint(LOG_NOISE,
|
||
"[FM] FmpHandleResourceFailure: taking resource %1!ws! and dependents offline\n",
|
||
OmObjectId(pResource));
|
||
|
||
|
||
|
||
if ( pResource->State == ClusterResourceOnline )
|
||
{
|
||
ClRtlLogPrint(LOG_NOISE,
|
||
"[FM] Resource %1!ws! failed, but still online!\n",
|
||
OmObjectId(pResource));
|
||
}
|
||
|
||
|
||
dwStatus = FmpTerminateResource(pResource);
|
||
|
||
if (dwStatus != NO_ERROR)
|
||
{
|
||
ClRtlLogPrint(LOG_NOISE,
|
||
"[FM] FmpHandleResourceFailure: offline of resource %1!ws! failed\n",
|
||
OmObjectId(pResource));
|
||
}
|
||
|
||
//
|
||
// If system shutdown has begun, then don't bother trying to restart anything.
|
||
// We can see spurious failures during shutdown as the network goes away, but
|
||
// we do not want to be restarting resources while FmShutdownGroups is trying
|
||
// to take them offline!
|
||
//
|
||
if (FmpShutdown)
|
||
{
|
||
return;
|
||
}
|
||
// SS: We handle the failure of the quorum resource specially
|
||
// since other resources rely on it and may be blocked waiting
|
||
// for the quorum resource to come online.
|
||
|
||
++ pResource->NumberOfFailures;
|
||
switch ( pResource->RestartAction )
|
||
{
|
||
|
||
case RestartNot:
|
||
// Don't do anything.
|
||
// However, if this is a quorum resource cause it to halt
|
||
if (pResource->QuorumResource)
|
||
{
|
||
//cleanup quorum resource and cause the node to halt
|
||
if (pResource->RestartAction == RestartNot)
|
||
{
|
||
FmpCleanupQuorumResource(pResource);
|
||
CsInconsistencyHalt(ERROR_QUORUM_RESOURCE_ONLINE_FAILED);
|
||
}
|
||
}
|
||
|
||
break;
|
||
|
||
|
||
case RestartLocal:
|
||
// fall through is correct for this case
|
||
bRestartGroup = FALSE;
|
||
case RestartGroup:
|
||
//
|
||
// If the number of failures is too high, then don't restart locally.
|
||
// If this was a local restart then don't notify FM so that Group
|
||
// doesn't move because of this guy; otherwise notify the FM that the
|
||
// group has failed.
|
||
//
|
||
//
|
||
// Get our current time, in milliseconds.
|
||
//
|
||
tickCount = GetTickCount();
|
||
|
||
//
|
||
// Compute a boolean that tells if we are withing the allotted
|
||
// failure period.
|
||
//
|
||
withinFailurePeriod = ( ((tickCount - pResource->FailureTime) <=
|
||
pResource->RestartPeriod) ? TRUE : FALSE);
|
||
|
||
//
|
||
// If it's been a long time since our last failure, then
|
||
// get the current time of this failure, and reset the count
|
||
// of failures.
|
||
//
|
||
if ( !withinFailurePeriod ) {
|
||
pResource->FailureTime = tickCount;
|
||
pResource->NumberOfFailures = 1;
|
||
}
|
||
if ( pResource->NumberOfFailures <= pResource->RestartThreshold )
|
||
{
|
||
FmpRestartResourceTree( pResource );
|
||
}
|
||
|
||
else if ( bRestartGroup )
|
||
{
|
||
ClusterEvent( CLUSTER_EVENT_GROUP_FAILED, pResource->Group );
|
||
}
|
||
else
|
||
{
|
||
ClRtlLogPrint(LOG_NOISE,
|
||
"[FM] RestartLocal: resource %1!ws! has exceeded its restart limit!\n",
|
||
OmObjectId(pResource));
|
||
if (pResource->QuorumResource)
|
||
{
|
||
FmpCleanupQuorumResource(pResource);
|
||
CsInconsistencyHalt(ERROR_QUORUM_RESOURCE_ONLINE_FAILED);
|
||
}
|
||
// Start a timer for which will attempt to restart the resource later
|
||
FmpDelayedStartRes(pResource);
|
||
}
|
||
|
||
break;
|
||
|
||
default:
|
||
ClRtlLogPrint(LOG_NOISE,"[FM] FmpHandleResourceFailure: unknown restart action! Value = %1!u!\n",
|
||
pResource->RestartAction);
|
||
|
||
}
|
||
|
||
return;
|
||
|
||
} // FmpHandleResourceFailure
|
||
|
||
|
||
|
||
VOID
|
||
FmpHandleResourceTransition(
|
||
IN PFM_RESOURCE Resource,
|
||
IN CLUSTER_RESOURCE_STATE NewState
|
||
)
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
Takes appropriate action based on resource state transitions indicated
|
||
by the Resource Monitor.
|
||
|
||
Arguments:
|
||
|
||
Resource - The resource which has transitioned.
|
||
|
||
NewState - The new state of Resource.
|
||
|
||
Return Value:
|
||
|
||
None.
|
||
|
||
--*/
|
||
|
||
{
|
||
DWORD status;
|
||
DWORD dwOldBlockingFlag;
|
||
|
||
ChkFMState:
|
||
ACQUIRE_SHARED_LOCK(gQuoChangeLock);
|
||
if (!FmpFMGroupsInited)
|
||
{
|
||
DWORD dwRetryCount = 50;
|
||
|
||
|
||
//FmFormNewClusterPhaseProcessing is in progress
|
||
if (FmpFMFormPhaseProcessing)
|
||
{
|
||
ClRtlLogPrint(LOG_CRITICAL,
|
||
"[FM] FmpHandleResourceTransition: resource notification from quorum resource "
|
||
"during phase processing. Sleep and retry\n");
|
||
RELEASE_LOCK(gQuoChangeLock);
|
||
Sleep(500);
|
||
if (dwRetryCount--)
|
||
goto ChkFMState;
|
||
else
|
||
{
|
||
ClRtlLogPrint(LOG_CRITICAL,
|
||
"[FM] FmpHandleResourceTransition: waited for too long\n");
|
||
//terminate the process
|
||
CL_ASSERT(FALSE);
|
||
CsInconsistencyHalt(ERROR_CLUSTER_NODE_DOWN);
|
||
}
|
||
}
|
||
//this can only come from the quorum resource
|
||
CL_ASSERT(Resource->QuorumResource);
|
||
}
|
||
|
||
// if this is from the quorum resource, we need to do some special handling
|
||
// protect the check for quorum resource by acquiring the shared lock
|
||
|
||
if (Resource->QuorumResource)
|
||
{
|
||
//
|
||
// Chittur Subbaraman (chitturs) - 6/25/99
|
||
//
|
||
// Handle the sync notifications for the quorum resource. This is
|
||
// done here instead of in FmpRmDoInterlockedDecrement since we
|
||
// need to hold the gQuoChangeLock for this to synchronize with
|
||
// other threads such as the FmCheckQuorumState called by the DM
|
||
// node down handler. Note that FmpRmDoInterLockedDecrement needs
|
||
// to be done with NO LOCKS held since it easily runs into deadlock
|
||
// situations in which the quorum resource offline is waiting to
|
||
// have the blocking resources count go to 0 and FmpRmDoInterLockedDecrement
|
||
// which alone can make this count to 0 could be stuck waiting for
|
||
// the lock.
|
||
//
|
||
DWORD dwBlockingFlag = InterlockedExchange( &Resource->BlockingQuorum, 0 );
|
||
|
||
CL_ASSERT( dwBlockingFlag == FALSE );
|
||
|
||
FmpCallResourceNotifyCb( Resource, NewState );
|
||
|
||
ACQUIRE_EXCLUSIVE_LOCK(gQuoLock);
|
||
|
||
}
|
||
else
|
||
{
|
||
FmpAcquireLocalResourceLock(Resource);
|
||
}
|
||
|
||
|
||
ClRtlLogPrint(
|
||
NewState == ClusterResourceFailed ? LOG_UNUSUAL : LOG_NOISE,
|
||
"[FM] HandleResourceTransition: Resource Name = %1!ws! old state=%2!u! new state=%3!u!\r\n",
|
||
OmObjectId(Resource),
|
||
Resource->State,
|
||
NewState
|
||
);
|
||
|
||
if ( Resource->State == NewState )
|
||
{
|
||
ClRtlLogPrint(LOG_NOISE,
|
||
"[FM] HandleResourceTransition: Resource %1!ws! already in state=%2!u!\r\n",
|
||
OmObjectId(Resource),
|
||
NewState );
|
||
goto FnExit;
|
||
}
|
||
|
||
switch (Resource->State) {
|
||
|
||
case ClusterResourceOnline:
|
||
// if there is a resource failure, then let the worker thread handle it
|
||
// if there is a state change call the resource state change handler
|
||
if (Resource->State != NewState)
|
||
FmpPropagateResourceState( Resource, NewState );
|
||
if (NewState == ClusterResourceFailed)
|
||
{
|
||
if (Resource->QuorumResource)
|
||
{
|
||
RELEASE_LOCK(gQuoLock);
|
||
|
||
FmpProcessResourceEvents(Resource, ClusterResourceFailed,
|
||
ClusterResourceOnline);
|
||
ACQUIRE_EXCLUSIVE_LOCK(gQuoLock);
|
||
|
||
}
|
||
else
|
||
{
|
||
FmpProcessResourceEvents(Resource, ClusterResourceFailed,
|
||
ClusterResourceOnline);
|
||
}
|
||
}
|
||
else
|
||
{
|
||
CL_ASSERT( (NewState == ClusterResourceOnline) ||
|
||
(NewState == ClusterResourceOffline) );
|
||
}
|
||
break;
|
||
|
||
|
||
case ClusterResourceFailed:
|
||
if (Resource->State != NewState)
|
||
FmpPropagateResourceState( Resource, NewState );
|
||
break;
|
||
|
||
case ClusterResourceOfflinePending:
|
||
//SS: a resource cannot go from one pending state to another
|
||
CL_ASSERT( NewState < ClusterResourcePending )
|
||
// fall through
|
||
case ClusterResourceOffline:
|
||
//
|
||
// Because this resource is now unstuck... there may be other
|
||
// pending threads waiting to clear up. If not, they'll just get
|
||
// stuck again, until the next notification.
|
||
//
|
||
switch ( NewState ) {
|
||
|
||
case ClusterResourceFailed:
|
||
if ( Resource->State != NewState )
|
||
FmpPropagateResourceState( Resource, NewState );
|
||
|
||
// if it is the quorum resource handle the locking appropriately
|
||
if (Resource->QuorumResource)
|
||
{
|
||
|
||
//
|
||
// Chittur Subbaraman (chitturs) - 9/20/99
|
||
//
|
||
// Release and reacquire the gQuoLock to maintain
|
||
// locking order between group lock and gQuoLock.
|
||
//
|
||
RELEASE_LOCK(gQuoLock);
|
||
|
||
FmpProcessResourceEvents(Resource, ClusterResourceFailed,
|
||
ClusterResourceOffline);
|
||
|
||
ACQUIRE_EXCLUSIVE_LOCK(gQuoLock);
|
||
}
|
||
else
|
||
{
|
||
FmpProcessResourceEvents(Resource, ClusterResourceFailed,
|
||
ClusterResourceOffline);
|
||
}
|
||
break;
|
||
|
||
case ClusterResourceOffline:
|
||
if ( Resource->Group->OwnerNode == NmLocalNode )
|
||
{
|
||
if ( Resource->State != NewState )
|
||
{
|
||
FmpPropagateResourceState( Resource, NewState );
|
||
}
|
||
|
||
// if it is the quorum resource handle the locking appropriately
|
||
if (Resource->QuorumResource)
|
||
{
|
||
//
|
||
// Chittur Subbaraman (chitturs) - 9/20/99
|
||
//
|
||
// Release and reacquire the gQuoLock to maintain
|
||
// locking order between group lock and gQuoLock.
|
||
//
|
||
RELEASE_LOCK(gQuoLock);
|
||
|
||
FmpProcessResourceEvents(Resource, ClusterResourceOffline,
|
||
ClusterResourceOfflinePending);
|
||
|
||
ACQUIRE_EXCLUSIVE_LOCK(gQuoLock);
|
||
}
|
||
else
|
||
{
|
||
FmpProcessResourceEvents(Resource, ClusterResourceOffline,
|
||
ClusterResourceOfflinePending);
|
||
}
|
||
}
|
||
else
|
||
{
|
||
if ( Resource->State != NewState )
|
||
{
|
||
FmpPropagateResourceState( Resource, NewState );
|
||
}
|
||
}
|
||
break;
|
||
|
||
default:
|
||
if ( Resource->State != NewState ) {
|
||
FmpPropagateResourceState( Resource, NewState );
|
||
}
|
||
break;
|
||
|
||
}
|
||
break;
|
||
|
||
case ClusterResourceOnlinePending:
|
||
//SS: a resource cannot go from one pending state to another
|
||
CL_ASSERT( NewState < ClusterResourcePending )
|
||
|
||
//
|
||
// Because this resource is now unstuck... there may be other
|
||
// pending threads waiting to clear up. If not, they'll just get
|
||
// stuck again, until the next notification.
|
||
//
|
||
|
||
switch ( NewState ) {
|
||
|
||
case ClusterResourceFailed:
|
||
//
|
||
// Make sure we go through full failure recovery.
|
||
//
|
||
//SS: dont know why the state is being set to online
|
||
//it could be online pending
|
||
//Resource->State = ClusterResourceOnline;
|
||
ClRtlLogPrint(LOG_NOISE,
|
||
"[FM] HandleResourceTransition: Resource failed, post a work item\r\n");
|
||
if (Resource->State != NewState)
|
||
FmpPropagateResourceState( Resource, NewState );
|
||
|
||
// since this is the quorum Resource handle locking appropriately
|
||
if (Resource->QuorumResource)
|
||
{
|
||
|
||
//
|
||
// Chittur Subbaraman (chitturs) - 9/20/99
|
||
//
|
||
// Release and reacquire the gQuoLock to maintain
|
||
// locking order between group lock and gQuoLock.
|
||
//
|
||
RELEASE_LOCK(gQuoLock);
|
||
|
||
FmpProcessResourceEvents(Resource, ClusterResourceFailed,
|
||
ClusterResourceOnlinePending);
|
||
|
||
ACQUIRE_EXCLUSIVE_LOCK(gQuoLock);
|
||
}
|
||
else
|
||
{
|
||
FmpProcessResourceEvents(Resource, ClusterResourceFailed,
|
||
ClusterResourceOnlinePending);
|
||
|
||
}
|
||
break;
|
||
|
||
case ClusterResourceOnline:
|
||
if (Resource->Group->OwnerNode == NmLocalNode) {
|
||
//Call FmpPropagateResourceState without holding the group
|
||
//lock for the quorum resource
|
||
FmpPropagateResourceState( Resource, NewState );
|
||
|
||
// since this is the quorum Resource fork another thread
|
||
if (Resource->QuorumResource)
|
||
{
|
||
//
|
||
// Chittur Subbaraman (chitturs) - 9/20/99
|
||
//
|
||
// Release and reacquire the gQuoLock to maintain
|
||
// locking order between group lock and gQuoLock.
|
||
//
|
||
RELEASE_LOCK(gQuoLock);
|
||
|
||
FmpProcessResourceEvents(Resource, ClusterResourceOnline,
|
||
ClusterResourceOnlinePending);
|
||
|
||
ACQUIRE_EXCLUSIVE_LOCK(gQuoLock);
|
||
}
|
||
else
|
||
{
|
||
FmpProcessResourceEvents(Resource, ClusterResourceOnline,
|
||
ClusterResourceOnlinePending);
|
||
}
|
||
} else {
|
||
FmpPropagateResourceState( Resource, NewState );
|
||
}
|
||
break;
|
||
|
||
default:
|
||
if (Resource->State != NewState)
|
||
FmpPropagateResourceState( Resource, NewState );
|
||
break;
|
||
}
|
||
|
||
Resource->Flags &= ~RESOURCE_WAITING;
|
||
break;
|
||
|
||
case ClusterResourceInitializing:
|
||
default:
|
||
if (Resource->State != NewState)
|
||
FmpPropagateResourceState( Resource, NewState );
|
||
CL_ASSERT(Resource->State == NewState);
|
||
}
|
||
|
||
FnExit:
|
||
|
||
if (Resource->QuorumResource) {
|
||
RELEASE_LOCK(gQuoLock);
|
||
} else {
|
||
FmpReleaseLocalResourceLock(Resource);
|
||
}
|
||
|
||
RELEASE_LOCK(gQuoChangeLock);
|
||
|
||
return;
|
||
}
|
||
|
||
|
||
/****
|
||
@func DWORD | FmpCreateResNotificationHandler| This creates a new
|
||
thread to handle state change notifications for the given resource.
|
||
|
||
@parm IN PFM_RESOURCE | pResource | Pointer to the resource.
|
||
@parm IN CLUSTER_RESOURCE_STATE | OldState | The old state of the
|
||
resource from which it transitioned.
|
||
@parm IN CLUSTER_RESOURCE_STATE | NewState | The new state of the
|
||
resource.
|
||
|
||
@comm This routine creates a thread to perform all the pending work
|
||
when the resource changes state that cannot be performed within
|
||
FmpHandleResourceTransition to avoid deadlocks and that cannot
|
||
be deffered to the FmpWorkerThread because of serialization issues.
|
||
In particular, it is used to handle state transition work for the
|
||
quorum resource since other resources depend on the quorum resource
|
||
and cannot come online till the state of the quorum becomes online.
|
||
For instance, the quorum resource may be coming offline as a part
|
||
of move while another resource if in FmpWorkerThread() calling
|
||
FmpOffline/OnlineWaitingTree(). For the quorum resource to come
|
||
online again (that happens by signalling the move pending thread)
|
||
so that FmpWorkerThread can make progress its events will have
|
||
to be handled separately.
|
||
|
||
@rdesc Returns a result code. ERROR_SUCCESS on success.
|
||
|
||
@xref <f FmpHandleResStateChangeProc>
|
||
|
||
****/
|
||
DWORD FmpCreateResStateChangeHandler(
|
||
IN PFM_RESOURCE pResource,
|
||
IN CLUSTER_RESOURCE_STATE NewState,
|
||
IN CLUSTER_RESOURCE_STATE OldState)
|
||
{
|
||
|
||
HANDLE hThread = NULL;
|
||
DWORD dwThreadId;
|
||
PRESOURCE_STATE_CHANGE pResStateContext = NULL;
|
||
DWORD dwStatus = ERROR_SUCCESS;
|
||
|
||
//reference the resource
|
||
//the thread will dereference it, if the thread is successfully
|
||
//created
|
||
ClRtlLogPrint(LOG_NOISE,
|
||
"[FM] FmpCreateResStateChangeHandler: Entry\r\n");
|
||
|
||
OmReferenceObject(pResource);
|
||
|
||
pResStateContext = LocalAlloc(LMEM_FIXED, sizeof(RESOURCE_STATE_CHANGE));
|
||
|
||
if (!pResStateContext)
|
||
{
|
||
|
||
dwStatus = GetLastError();
|
||
CL_UNEXPECTED_ERROR(dwStatus);
|
||
goto FnExit;
|
||
}
|
||
|
||
|
||
pResStateContext->pResource = pResource;
|
||
pResStateContext->OldState = OldState;
|
||
pResStateContext->NewState = NewState;
|
||
|
||
|
||
hThread = CreateThread( NULL, 0, FmpHandleResStateChangeProc,
|
||
pResStateContext, 0, &dwThreadId );
|
||
|
||
if ( hThread == NULL )
|
||
{
|
||
dwStatus = GetLastError();
|
||
CL_UNEXPECTED_ERROR(dwStatus);
|
||
// if the function failed to create the thread, cleanup the
|
||
// state that the thread would have cleaned
|
||
//deref the object if the thread is not created successfully
|
||
OmDereferenceObject(pResource);
|
||
LocalFree(pResStateContext);
|
||
goto FnExit;
|
||
}
|
||
|
||
FnExit:
|
||
//do general cleanup
|
||
if (hThread)
|
||
CloseHandle(hThread);
|
||
ClRtlLogPrint(LOG_NOISE,
|
||
"[FM] FmpCreateResStateChangeHandler: Exit, status %1!u!\r\n",
|
||
dwStatus);
|
||
return(dwStatus);
|
||
}
|
||
|
||
/****
|
||
@func DWORD | FmpHandleResStateChangeProc| This thread procedure
|
||
handles all the post processing for the resource transitions
|
||
for the quorum resource.
|
||
|
||
@parm IN LPVOID | pContext | A pointer to PRESOURCE_STATE_CHANGE
|
||
structure.
|
||
|
||
@comm This thread handles a resource change notification postprocessing.
|
||
Significantly for quorum resource so that quorum resource
|
||
state change notifications are not handled by the single
|
||
FmpWorkThread() [that causes deadlock - if the quorum
|
||
notification resource is queued behind a notification whose
|
||
handling requires tha quorum resource be online]..
|
||
|
||
@rdesc Returns a result code. ERROR_SUCCESS on success.
|
||
|
||
@xref <f FmpCreateResStateChangeHandler)
|
||
****/
|
||
DWORD
|
||
FmpHandleResStateChangeProc(
|
||
IN LPVOID pContext
|
||
)
|
||
{
|
||
PRESOURCE_STATE_CHANGE pResStateChange = pContext;
|
||
|
||
CL_ASSERT( pResStateChange );
|
||
|
||
ClRtlLogPrint(LOG_NOISE,
|
||
"[FM] FmpHandleResStateChangeProc: Entry...\r\n");
|
||
|
||
FmpHandleResourceTransition( pResStateChange->pResource,
|
||
pResStateChange->NewState );
|
||
|
||
OmDereferenceObject( pResStateChange->pResource );
|
||
|
||
LocalFree( pResStateChange );
|
||
|
||
ClRtlLogPrint(LOG_NOISE,
|
||
"[FM] FmpHandleResStateChangeProc: Exit...\r\n");
|
||
|
||
return( ERROR_SUCCESS );
|
||
}
|
||
|
||
|
||
DWORD
|
||
FmpDelayedStartRes(
|
||
IN PFM_RESOURCE pResource
|
||
)
|
||
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
Starts a timer for the resource. FmpDelayedRestartCb function will be
|
||
invoked at the expiry of timer..
|
||
|
||
Arguments:
|
||
|
||
pResource - The resource which has transitioned.
|
||
|
||
|
||
Return Value:
|
||
ERROR_SUCCESS if successful, WIN32 errorcode otherwise.
|
||
|
||
Note that no delayed restart attempts are made if the resource is a quorum resource.
|
||
|
||
--*/
|
||
{
|
||
DWORD dwStatus = ERROR_SUCCESS;
|
||
|
||
ClRtlLogPrint(LOG_NOISE,
|
||
"[FM] FmpDelayedRestartRes:Entry for resource %1!ws!\n",
|
||
OmObjectId(pResource));
|
||
|
||
if( (pResource->RetryPeriodOnFailure != CLUSTER_RESOURCE_DEFAULT_RETRY_PERIOD_ON_FAILURE ) &&
|
||
!(pResource->QuorumResource) )
|
||
{
|
||
// Check if there is already a timer running for this resource
|
||
|
||
if(pResource->hTimer == NULL)
|
||
{
|
||
pResource->hTimer = CreateWaitableTimer(NULL, FALSE, NULL);
|
||
if (!(pResource->hTimer))
|
||
{
|
||
// not a fatal error but log it
|
||
ClRtlLogPrint(LOG_ERROR,
|
||
"[FM] FmpDelayedRestartRes: failed to create the watchdog timer for resource %1!ws!\n",
|
||
OmObjectId(pResource));
|
||
}
|
||
else{
|
||
ClRtlLogPrint(LOG_NOISE,
|
||
"[FM] FmpDelayedRestartRes: Adding watchdog timer for resource %1!ws!, period=%2!u!\n",
|
||
OmObjectId(pResource),
|
||
pResource->RetryPeriodOnFailure);
|
||
|
||
// make sure resource struct won't go away if resource is deleted before the timer fires
|
||
OmReferenceObject(pResource);
|
||
|
||
//register the timer with the periodic activity timer thread
|
||
dwStatus = AddTimerActivity(pResource->hTimer, pResource->RetryPeriodOnFailure, 0, FmpDelayedRestartCb, pResource);
|
||
|
||
if (dwStatus != ERROR_SUCCESS)
|
||
{
|
||
ClRtlLogPrint(LOG_ERROR,
|
||
"[FM] FmpDelayedRestartRes: AddTimerActivity failed with error %1!u!\n",
|
||
dwStatus);
|
||
CloseHandle(pResource->hTimer);
|
||
pResource->hTimer = NULL;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
return dwStatus;
|
||
}
|
||
|
||
|
||
|
||
|
||
VOID
|
||
FmpDelayedRestartCb(
|
||
IN HANDLE hTimer,
|
||
IN PVOID pContext)
|
||
|
||
/*++
|
||
|
||
Routine Description
|
||
|
||
This is invoked by timer activity thread to attempt a restart on
|
||
a failed resource.
|
||
|
||
Arguments
|
||
pContext - a pointer to PFM_RESOURCE
|
||
|
||
Return Value
|
||
ERROR_SUCCESS on success, a WIN32 error code otherwise.
|
||
|
||
--*/
|
||
|
||
{
|
||
PFM_RESOURCE pResource;
|
||
|
||
pResource=(PFM_RESOURCE)pContext;
|
||
ClRtlLogPrint(LOG_NOISE,
|
||
"[FM] FmpDelayedRestartCb: Entry for resource %1!ws! \n",
|
||
OmObjectId(pResource));
|
||
|
||
OmReferenceObject(pResource);
|
||
FmpPostWorkItem(FM_EVENT_RES_RETRY_TIMER,
|
||
pResource,
|
||
0);
|
||
OmDereferenceObject(pResource);
|
||
return;
|
||
}
|
||
|