/*++ Copyright (c) 1996-1999 Microsoft Corporation Module Name: nminit.c Abstract: Initialization, cluster join, and cluster form routines for the Node Manager. Author: Mike Massa (mikemas) Revision History: 6/03/96 Created. --*/ /* General Implementation Notes: The functions DmBeginLocalUpdate, DmCommitLocalUpdate, and DmAbortLocalUpdate cannot be called while holding the NM lock, or a deadlock with the NmTimer thread may result during regroup when disk writes are stalled. These functions attempt to write to the quorum disk. */ #include "nmp.h" // // External Data // extern BOOL CsNoQuorum; // // Public Data // HANDLE NmClusnetHandle = NULL; // // Private Data // CRITICAL_SECTION NmpLock; NM_STATE NmpState = NmStateOffline; DWORD NmpActiveThreadCount = 0; HANDLE NmpShutdownEvent = NULL; CL_NODE_ID NmpJoinerNodeId = ClusterInvalidNodeId; CL_NODE_ID NmpSponsorNodeId = ClusterInvalidNodeId; DWORD NmpJoinTimer = 0; BOOLEAN NmpJoinAbortPending = FALSE; DWORD NmpJoinSequence = 0; BOOLEAN NmpJoinerUp = FALSE; BOOLEAN NmpJoinBeginInProgress = FALSE; BOOLEAN NmpJoinerOutOfSynch = FALSE; LPWSTR NmpClusnetEndpoint = NULL; WCHAR NmpInvalidJoinerIdString[] = L"0"; CL_NODE_ID NmpLeaderNodeId = ClusterInvalidNodeId; BOOL NmpCleanupIfJoinAborted = FALSE; BOOL NmpSuccessfulMMJoin = FALSE; DWORD NmpAddNodeId = ClusterInvalidNodeId; LPWSTR NmpClusterInstanceId = NULL; //externs extern DWORD CsMyHighestVersion; extern DWORD CsMyLowestVersion; extern DWORD CsClusterHighestVersion; extern DWORD CsClusterLowestVersion; GUM_DISPATCH_ENTRY NmGumDispatchTable[] = { {1, NmpUpdateCreateNode}, {1, NmpUpdatePauseNode}, {1, NmpUpdateResumeNode}, {1, NmpUpdateEvictNode}, {4, (PGUM_DISPATCH_ROUTINE1) NmpUpdateCreateNetwork}, {2, (PGUM_DISPATCH_ROUTINE1) NmpUpdateSetNetworkName}, {1, NmpUpdateSetNetworkPriorityOrder}, {3, (PGUM_DISPATCH_ROUTINE1) NmpUpdateSetNetworkCommonProperties}, {2, (PGUM_DISPATCH_ROUTINE1) NmpUpdateCreateInterface}, {2, (PGUM_DISPATCH_ROUTINE1) NmpUpdateSetInterfaceInfo}, {3, (PGUM_DISPATCH_ROUTINE1) NmpUpdateSetInterfaceCommonProperties}, {1, NmpUpdateDeleteInterface}, {3, (PGUM_DISPATCH_ROUTINE1) NmpUpdateJoinBegin}, {2, (PGUM_DISPATCH_ROUTINE1) NmpUpdateJoinAbort}, // // Version 2 (NT 5.0) extensions that are understood by NT4 SP4 // {5, (PGUM_DISPATCH_ROUTINE1) NmpUpdateJoinBegin2}, {4, (PGUM_DISPATCH_ROUTINE1) NmpUpdateSetNetworkAndInterfaceStates}, {2, (PGUM_DISPATCH_ROUTINE1) NmpUpdatePerformFixups}, {5, (PGUM_DISPATCH_ROUTINE1) NmpUpdatePerformFixups2}, // // Version 2 (NT 5.0) extensions that are not understood by NT4 SP4 // These may not be called in a mixed NT4/NT5 cluster. // {5, (PGUM_DISPATCH_ROUTINE1) NmpUpdateAddNode}, {2, (PGUM_DISPATCH_ROUTINE1) NmpUpdateExtendedNodeState}, // // NT 5.1 extensions that are not understood by NT5 and // earlier. NT5 nodes will ignore these updates without // error. // {4, (PGUM_DISPATCH_ROUTINE1) NmpUpdateSetNetworkMulticastConfiguration}, }; // // Local prototypes // DWORD NmpCreateRpcBindings( IN PNM_NODE Node ); DWORD NmpCreateClusterInstanceId( VOID ); // // Component initialization routines. // DWORD NmInitialize( VOID ) /*++ Routine Description: Initializes the Node Manager component. Arguments: None Return Value: A Win32 status code. Notes: The local node object is created by this routine. --*/ { DWORD status; OM_OBJECT_TYPE_INITIALIZE nodeTypeInitializer; HDMKEY nodeKey = NULL; DWORD nameSize = CS_MAX_NODE_NAME_LENGTH + 1; HKEY serviceKey; DWORD nodeIdSize = (CS_MAX_NODE_ID_LENGTH + 1) * sizeof(WCHAR); LPWSTR nodeIdString = NULL; WSADATA wsaData; WORD versionRequested; int err; ULONG ndx; DWORD valueType; NM_NODE_INFO2 nodeInfo; WCHAR errorString[12]; DWORD eventCode = 0; LPWSTR string; CL_ASSERT(NmpState == NmStateOffline); ClRtlLogPrint(LOG_NOISE,"[NM] Initializing...\n"); // // Initialize globals. // InitializeCriticalSection(&NmpLock); InitializeListHead(&NmpNodeList); InitializeListHead(&NmpNetworkList); InitializeListHead(&NmpInternalNetworkList); InitializeListHead(&NmpDeletedNetworkList); InitializeListHead(&NmpInterfaceList); InitializeListHead(&NmpDeletedInterfaceList); NmMaxNodes = ClusterDefaultMaxNodes; NmMaxNodeId = ClusterMinNodeId + NmMaxNodes - 1; // // Initializing the RPC Recording/cancelling mechanism // NOTE - This should move if NmMaxNodeId Definition above moves. // NmpIntraClusterRpcArr = LocalAlloc(LMEM_FIXED, sizeof(NM_INTRACLUSTER_RPC_THREAD) * (NmMaxNodeId +1)); if(NmpIntraClusterRpcArr == NULL) { ClRtlLogPrint(LOG_CRITICAL, "[NM] Failed to allocate memory for RPC monitoring.\n" ); status = ERROR_NOT_ENOUGH_MEMORY; eventCode = CS_EVENT_ALLOCATION_FAILURE; goto error_exit; } else { ZeroMemory(NmpIntraClusterRpcArr, sizeof(NM_INTRACLUSTER_RPC_THREAD) * (NmMaxNodeId + 1)); for(ndx = 0;ndx <= NmMaxNodeId;ndx++) InitializeListHead(&NmpIntraClusterRpcArr[ndx]); InitializeCriticalSection(&NmpRPCLock); } // // Initialize the network configuration package. // ClNetInitialize( ClNetPrint, ClNetLogEvent, ClNetLogEvent1, ClNetLogEvent2, ClNetLogEvent3 ); // // Initialize WinSock // versionRequested = MAKEWORD(2,0); err = WSAStartup(versionRequested, &wsaData); if (err != 0) { status = WSAGetLastError(); wsprintfW(&(errorString[0]), L"%u", status); CsLogEvent1(LOG_CRITICAL, NM_EVENT_WSASTARTUP_FAILED, errorString); ClRtlLogPrint(LOG_NOISE,"[NM] Failed to initialize Winsock, status %1!u!\n", status); return(status); } if ( (LOBYTE(wsaData.wVersion) != 2) || (HIBYTE(wsaData.wVersion) != 0)) { status = WSAVERNOTSUPPORTED; wsprintfW(&(errorString[0]), L"%u", status); CsLogEvent1(LOG_CRITICAL, NM_EVENT_WSASTARTUP_FAILED, errorString); ClRtlLogPrint(LOG_CRITICAL, "[NM] Found unexpected Windows Sockets version %1!u!\n", wsaData.wVersion ); WSACleanup(); return(status); } NmpShutdownEvent = CreateEvent(NULL, TRUE, FALSE, NULL); if (NmpShutdownEvent == NULL) { status = GetLastError(); wsprintfW(&(errorString[0]), L"%u", status); CsLogEvent1(LOG_CRITICAL, CS_EVENT_ALLOCATION_FAILURE, errorString); ClRtlLogPrint(LOG_CRITICAL, "[NM] Failed to create shutdown event, status %1!u!\n", status ); WSACleanup(); return(status); } NmpState = NmStateOnlinePending; // // Get the name of this node. // if (!GetComputerName(&(NmLocalNodeName[0]), &nameSize)) { status = GetLastError(); eventCode = NM_EVENT_GETCOMPUTERNAME_FAILED; ClRtlLogPrint(LOG_CRITICAL, "[NM] Unable to get local computername, status %1!u!\n", status ); goto error_exit; } ClRtlLogPrint(LOG_NOISE, "[NM] Local node name = %1!ws!.\n", NmLocalNodeName ); // // Open a control channel to the Cluster Network driver // NmClusnetHandle = ClusnetOpenControlChannel(0); if (NmClusnetHandle == NULL) { status = GetLastError(); eventCode = NM_EVENT_CLUSNET_UNAVAILABLE; ClRtlLogPrint(LOG_CRITICAL, "[NM] Unable to open a handle to the Cluster Network driver, status %1!u!\n", status ); goto error_exit; } // // Tell the Cluster Network driver to shutdown when our handle is closed // in case the Cluster Service crashes. // status = ClusnetEnableShutdownOnClose(NmClusnetHandle); if (status != ERROR_SUCCESS) { eventCode = NM_EVENT_CLUSNET_ENABLE_SHUTDOWN_FAILED; ClRtlLogPrint(LOG_CRITICAL, "[NM] Unable to register Cluster Network shutdown trigger, status %1!u!\n", status ); goto error_exit; } // // Allocate the node ID array. // CL_ASSERT(NmpIdArray == NULL); NmpIdArray = LocalAlloc( LMEM_FIXED, (sizeof(PNM_NODE) * (NmMaxNodeId + 1)) ); if (NmpIdArray == NULL) { status = ERROR_NOT_ENOUGH_MEMORY; eventCode = CS_EVENT_ALLOCATION_FAILURE; goto error_exit; } ZeroMemory(NmpIdArray, (sizeof(PNM_NODE) * (NmMaxNodeId + 1))); // // Create the node object type // ZeroMemory(&nodeTypeInitializer, sizeof(OM_OBJECT_TYPE_INITIALIZE)); nodeTypeInitializer.ObjectSize = sizeof(NM_NODE); nodeTypeInitializer.Signature = NM_NODE_SIG; nodeTypeInitializer.Name = L"Node"; nodeTypeInitializer.DeleteObjectMethod = NmpDestroyNodeObject; status = OmCreateType(ObjectTypeNode, &nodeTypeInitializer); if (status != ERROR_SUCCESS) { eventCode = CS_EVENT_ALLOCATION_FAILURE; ClRtlLogPrint(LOG_CRITICAL, "[NM] Unable to create node object type, status %1!u!\n", status ); goto error_exit; } // // Get the local node ID from the local registry. // status = RegCreateKeyW( HKEY_LOCAL_MACHINE, CLUSREG_KEYNAME_CLUSSVC_PARAMETERS, &serviceKey ); if (status != ERROR_SUCCESS) { wsprintfW(&(errorString[0]), L"%u", status); CsLogEvent2( LOG_CRITICAL, CS_EVENT_REG_OPEN_FAILED, CLUSREG_KEYNAME_CLUSSVC_PARAMETERS, errorString ); ClRtlLogPrint(LOG_CRITICAL, "[NM] Failed to open cluster service parameters key, status %1!u!\n", status ); goto error_exit; } string = L"NodeId"; status = RegQueryValueExW( serviceKey, string, 0, &valueType, (LPBYTE) &(NmLocalNodeIdString[0]), &nodeIdSize ); RegCloseKey(serviceKey); if (status != ERROR_SUCCESS) { wsprintfW(&(errorString[0]), L"%u", status); CsLogEvent2( LOG_CRITICAL, CS_EVENT_REG_QUERY_FAILED, string, errorString ); ClRtlLogPrint(LOG_CRITICAL, "[NM] Failed to read local node ID from registry, status %1!u!\n", status ); goto error_exit; } if (valueType != REG_SZ) { status = ERROR_INVALID_PARAMETER; wsprintfW(&(errorString[0]), L"%u", status); CsLogEvent2( LOG_CRITICAL, CS_EVENT_REG_QUERY_FAILED, string, errorString ); ClRtlLogPrint(LOG_CRITICAL, "[NM] Local Node ID registry value is not of type REG_SZ.\n" ); goto error_exit; } ClRtlLogPrint(LOG_NOISE, "[NM] Local node ID = %1!ws!.\n", NmLocalNodeIdString ); NmLocalNodeId = wcstoul(NmLocalNodeIdString, NULL, 10); // // Get information about the local node. // wcscpy(&(nodeInfo.NodeId[0]), NmLocalNodeIdString); status = NmpGetNodeDefinition(&nodeInfo); if (status != ERROR_SUCCESS) { goto error_exit; } // // Create the local node object. We must do this here because GUM // requires the local node object to initialize. // status = NmpCreateLocalNodeObject(&nodeInfo); ClNetFreeNodeInfo(&nodeInfo); if (status != ERROR_SUCCESS) { goto error_exit; } // // Initialize the network and interface object types // status = NmpInitializeNetworks(); if (status != ERROR_SUCCESS) { goto error_exit; } status = NmpInitializeInterfaces(); if (status != ERROR_SUCCESS) { goto error_exit; } // // Initialize net PnP handling // status = NmpInitializePnp(); if (status != ERROR_SUCCESS) { goto error_exit; } // // init the advise sink that tells when a connection object has been // renamed // status = NmpInitializeConnectoidAdviseSink(); if (status != ERROR_SUCCESS) { goto error_exit; } ClRtlLogPrint(LOG_NOISE,"[NM] Initialization complete.\n"); return(ERROR_SUCCESS); error_exit: if (eventCode != 0) { wsprintfW(&(errorString[0]), L"%u", status); CsLogEvent1(LOG_CRITICAL, eventCode, errorString); } wsprintfW( &(errorString[0]), L"%u", status ); CsLogEvent1(LOG_CRITICAL, NM_INIT_FAILED, errorString); ClRtlLogPrint(LOG_CRITICAL,"[NM] Initialization failed %1!d!\n",status); NmShutdown(); return(status); } // NmInitialize VOID NmShutdown( VOID ) /*++ Routine Description: Terminates all processing - shuts down all sources of work for worker threads. Arguments: Return Value: --*/ { DWORD status; if (NmpState == NmStateOffline) { return; } NmCloseConnectoidAdviseSink(); NmpShutdownPnp(); NmpAcquireLock(); ClRtlLogPrint(LOG_NOISE,"[NM] Shutdown starting...\n"); NmpState = NmStateOfflinePending; if (NmpActiveThreadCount > 0) { ClRtlLogPrint(LOG_NOISE, "[NM] Waiting for %1!u! active threads to terminate...\n", NmpActiveThreadCount ); NmpReleaseLock(); status = WaitForSingleObject(NmpShutdownEvent, INFINITE); CL_ASSERT(status == WAIT_OBJECT_0); ClRtlLogPrint(LOG_NOISE, "[NM] All active threads have completed. Continuing shutdown...\n" ); } else { NmpReleaseLock(); } NmLeaveCluster(); NmpCleanupPnp(); if (NmLocalNode != NULL) { NmpDeleteNodeObject(NmLocalNode, FALSE); NmLocalNode = NULL; } if (NmpIdArray != NULL) { LocalFree(NmpIdArray); NmpIdArray = NULL; } NmpFreeClusterKey(); if (NmpClusterInstanceId != NULL) { MIDL_user_free(NmpClusterInstanceId); NmpClusterInstanceId = NULL; } if (NmClusnetHandle != NULL) { ClusnetCloseControlChannel(NmClusnetHandle); NmClusnetHandle = NULL; } CloseHandle(NmpShutdownEvent); NmpShutdownEvent = NULL; WSACleanup(); // // As long as the GUM and Clusapi RPC interfaces cannot be // shutdown, it is not safe to delete this critical section. // // DeleteCriticalSection(&NmpLock); NmpState = NmStateOffline; ClRtlLogPrint(LOG_NOISE,"[NM] Shutdown complete.\n"); return; } // NmShutdown VOID NmLeaveCluster( VOID ) /*++ Routine Description: Arguments: Return Value: --*/ { DWORD status; if (NmLocalNode != NULL) { if ( (NmLocalNode->State == ClusterNodeUp) || (NmLocalNode->State == ClusterNodePaused) || (NmLocalNode->State == ClusterNodeJoining) ) { // // Leave the cluster. // ClRtlLogPrint(LOG_NOISE,"[NM] Leaving cluster.\n"); MMLeave(); #ifdef MM_IN_CLUSNET status = ClusnetLeaveCluster(NmClusnetHandle); CL_ASSERT(status == ERROR_SUCCESS); #endif // MM_IN_CLUSNET } } NmpMembershipShutdown(); NmpCleanupInterfaces(); NmpCleanupNetworks(); NmpCleanupNodes(); // // Shutdown the Cluster Network driver. // if (NmClusnetHandle != NULL) { DWORD status = ClusnetShutdown(NmClusnetHandle); if (status != ERROR_SUCCESS) { ClRtlLogPrint(LOG_UNUSUAL, "[NM] Shutdown of the Cluster Network driver failed, status %1!u!\n", status ); } } if (NmpClusnetEndpoint != NULL) { MIDL_user_free(NmpClusnetEndpoint); NmpClusnetEndpoint = NULL; } return; } // NmLeaveCluster DWORD NmpCreateClusterObjects( IN RPC_BINDING_HANDLE JoinSponsorBinding ) /*++ Routine Description: Creates objects to represent the cluster's nodes, networks, and interfaces. Arguments: JoinSponsorBinding - A pointer to an RPC binding handle for the sponsor node if this node is joining a cluster. NULL if this node is forming a cluster. Return Value: ERROR_SUCCESS if the routine is successful. A Win32 error code otherwise. Notes: This routine MUST NOT be called with the NM lock held. --*/ { DWORD status; PNM_NODE_ENUM2 nodeEnum = NULL; PNM_NETWORK_ENUM networkEnum = NULL; PNM_INTERFACE_ENUM2 interfaceEnum = NULL; PNM_NODE node = NULL; DWORD matchedNetworkCount = 0; DWORD newNetworkCount = 0; DWORD InitRetry = 2; WCHAR errorString[12]; DWORD eventCode = 0; BOOL renameConnectoids; while ( InitRetry-- ) { // // Initialize the Cluster Network driver. This will clean up // any old state that was left around from the last run of the // Cluster Service. Note that the local node object is registered in // this call. // status = ClusnetInitialize( NmClusnetHandle, NmLocalNodeId, NmMaxNodes, NULL, NULL, NULL, NULL, NULL, NULL ); if (status == ERROR_SUCCESS) { break; } else { ClRtlLogPrint(LOG_UNUSUAL, "[NM] Shutting down Cluster Network driver before retrying Initialization, status %1!u!\n", status); ClusnetShutdown( NmClusnetHandle ); } }; if ( status != ERROR_SUCCESS ) { eventCode = NM_EVENT_CLUSNET_INITIALIZE_FAILED; ClRtlLogPrint(LOG_CRITICAL, "[NM] Initialization of the Cluster Network driver failed, status %1!u!\n", status ); goto error_exit; } // // Tell the Cluster Network driver to reserve the Cluster Network // endpoint on this node. // status = ClusnetReserveEndpoint( NmClusnetHandle, NmpClusnetEndpoint ); if (status != ERROR_SUCCESS) { ClRtlLogPrint(LOG_CRITICAL, "[NM] Unable to reserve Clusnet Network endpoint %1!ws!, " "status %2!u!\n", NmpClusnetEndpoint, status ); wsprintfW(&(errorString[0]), L"%u", status); CsLogEvent2( LOG_CRITICAL, NM_EVENT_CLUSNET_RESERVE_ENDPOINT_FAILED, NmpClusnetEndpoint, errorString ); goto error_exit; } // // Obtain the node portion of the cluster database. // ClRtlLogPrint(LOG_NOISE, "[NM] Synchronizing node information.\n" ); if (JoinSponsorBinding == NULL) { status = NmpEnumNodeDefinitions(&nodeEnum); } else { status = NmRpcEnumNodeDefinitions2( JoinSponsorBinding, NmpJoinSequence, NmLocalNodeIdString, &nodeEnum ); } if (status != ERROR_SUCCESS) { eventCode = NM_EVENT_CONFIG_SYNCH_FAILED; ClRtlLogPrint(LOG_CRITICAL, "[NM] Unable to synchronize node information, status %1!u!.\n", status ); goto error_exit; } // // Create the node objects. // ClRtlLogPrint(LOG_NOISE, "[NM] Creating node objects.\n" ); status = NmpCreateNodeObjects(nodeEnum); if (status != ERROR_SUCCESS) { goto error_exit; } // // Obtain the networks portion of the cluster database. // ClRtlLogPrint(LOG_NOISE, "[NM] Synchronizing network information.\n" ); if (JoinSponsorBinding == NULL) { status = NmpEnumNetworkDefinitions(&networkEnum); } else { status = NmRpcEnumNetworkDefinitions( JoinSponsorBinding, NmpJoinSequence, NmLocalNodeIdString, &networkEnum ); } if (status != ERROR_SUCCESS) { eventCode = NM_EVENT_CONFIG_SYNCH_FAILED; ClRtlLogPrint(LOG_CRITICAL, "[NM] Unable to synchronize network information, status %1!u!.\n", status ); goto error_exit; } // // Obtain the interfaces portion of the cluster database. // ClRtlLogPrint(LOG_NOISE, "[NM] Synchronizing interface information.\n" ); if (JoinSponsorBinding == NULL) { status = NmpEnumInterfaceDefinitions(&interfaceEnum); } else { status = NmRpcEnumInterfaceDefinitions2( JoinSponsorBinding, NmpJoinSequence, NmLocalNodeIdString, &interfaceEnum ); } if (status != ERROR_SUCCESS) { eventCode = NM_EVENT_CONFIG_SYNCH_FAILED; ClRtlLogPrint(LOG_CRITICAL, "[NM] Unable to synchronize interface information, status %1!u!.\n", status ); goto error_exit; } if ( CsUpgrade ) { // // If this is an upgrade from NT4 to Whistler, then fix up the // connectoid names so they align with the cluster network // names. // // REMOVE THIS PORTION AFTER WHISTLER HAS SHIPPED. // if ( CLUSTER_GET_MAJOR_VERSION( NmLocalNode->HighestVersion ) <= NT4SP4_MAJOR_VERSION ) { renameConnectoids = TRUE; } else { // // upgrade from W2K to Whistler. Nothing should have changed but // if it did, connectoids should have precedence // renameConnectoids = FALSE; } } else { // // THIS SECTION MUST ALWAYS BE HERE // // if forming, cluster network objects are renamed to its // corresponding connectoid name. During a join, the opposite is true. // if ( JoinSponsorBinding ) { renameConnectoids = TRUE; } else { renameConnectoids = FALSE; } } // // Post a PnP notification ioctl. If we receive a PnP notification // before we finish initializing, we must restart the process. // NmpWatchForPnpEvents(); if (status != ERROR_SUCCESS) { goto error_exit; } // // Run the network configuration engine. This will update the // cluster database. // status = NmpConfigureNetworks( JoinSponsorBinding, NmLocalNodeIdString, NmLocalNodeName, &networkEnum, &interfaceEnum, NmpClusnetEndpoint, &matchedNetworkCount, &newNetworkCount, renameConnectoids ); if (status != ERROR_SUCCESS) { ClRtlLogPrint(LOG_CRITICAL, "[NM] Failed to configure networks & interfaces, status %1!u!.\n", status ); goto error_exit; } ClRtlLogPrint(LOG_NOISE, "[NM] Matched %1!u! networks, created %2!u! new networks.\n", matchedNetworkCount, newNetworkCount ); // // Get the updated network information from the database. // ClRtlLogPrint(LOG_NOISE, "[NM] Resynchronizing network information.\n" ); if (JoinSponsorBinding == NULL) { status = NmpEnumNetworkDefinitions(&networkEnum); } else { status = NmRpcEnumNetworkDefinitions( JoinSponsorBinding, NmpJoinSequence, NmLocalNodeIdString, &networkEnum ); } if (status != ERROR_SUCCESS) { eventCode = NM_EVENT_CONFIG_SYNCH_FAILED; ClRtlLogPrint(LOG_CRITICAL, "[NM] Unable to resynchronize network information, " "status %1!u!.\n", status ); goto error_exit; } // // Get the updated interface information from the database. // ClRtlLogPrint(LOG_NOISE, "[NM] Resynchronizing interface information.\n" ); if (JoinSponsorBinding == NULL) { status = NmpEnumInterfaceDefinitions(&interfaceEnum); } else { status = NmRpcEnumInterfaceDefinitions2( JoinSponsorBinding, NmpJoinSequence, NmLocalNodeIdString, &interfaceEnum ); } if (status != ERROR_SUCCESS) { eventCode = NM_EVENT_CONFIG_SYNCH_FAILED; ClRtlLogPrint(LOG_CRITICAL, "[NM] Unable to resynchronize interface information, status %1!u!.\n", status ); goto error_exit; } // // Create the network objects. // ClRtlLogPrint(LOG_NOISE, "[NM] Creating network objects.\n" ); status = NmpCreateNetworkObjects(networkEnum); if (status != ERROR_SUCCESS) { ClRtlLogPrint(LOG_CRITICAL, "[NM] Failed to create network objects, status %1!u!.\n", status ); goto error_exit; } // // Fixup the priorities of the internal networks if we are forming // a cluster. // if (JoinSponsorBinding == NULL) { DWORD networkCount; PNM_NETWORK * networkList; status = NmpEnumInternalNetworks(&networkCount, &networkList); if ((status == ERROR_SUCCESS) && (networkCount > 0)) { DWORD i; HLOCALXSACTION xaction; // // Begin a transaction - this must not be done while holding // the NM lock. // xaction = DmBeginLocalUpdate(); if (xaction == NULL) { status = GetLastError(); ClRtlLogPrint(LOG_CRITICAL, "[NM] Failed to start a transaction, status %1!u!\n", status ); goto error_exit; } status = NmpSetNetworkPriorityOrder( networkCount, networkList, xaction ); if (status == ERROR_SUCCESS) { DmCommitLocalUpdate(xaction); } else { DmAbortLocalUpdate(xaction); goto error_exit; } for (i=0; iState = ClusterNodePaused; } else { NmLocalNode->State = ClusterNodeUp; } NmLocalNode->ExtendedState = ClusterNodeJoining; NmpState = NmStateOnline; NmpReleaseLock(); // // If the cluster instance ID does not exist, create it now. The cluster // instance ID should be in the database unless this is the first uplevel // node. // NmpCreateClusterInstanceId(); // // Create the cluster key. // status = NmpRegenerateClusterKey(); if (status != ERROR_SUCCESS) { ClRtlLogPrint(LOG_UNUSUAL, "[NM] Failed to generate cluster key, status %1!u!. " "Allowing service to continue ...\n", status ); status = ERROR_SUCCESS; } // // Enable communication for the local node. // status = ClusnetOnlineNodeComm(NmClusnetHandle, NmLocalNodeId); if (status != ERROR_SUCCESS) { wsprintfW(&(errorString[0]), L"%u", NmLocalNodeId); wsprintfW(&(errorString2[0]), L"%u", status); CsLogEvent2( LOG_CRITICAL, NM_EVENT_CLUSNET_ONLINE_COMM_FAILED, errorString, errorString2 ); ClRtlLogPrint(LOG_CRITICAL, "[NM] Failed to enable communication for local node, status %1!u!\n", status ); goto error_exit; } GumReceiveUpdates(FALSE, GumUpdateMembership, NmpGumUpdateHandler, NULL, sizeof(NmGumDispatchTable)/sizeof(GUM_DISPATCH_ENTRY), NmGumDispatchTable, NULL ); // // Enable network PnP event handling. // // If a PnP event occured during the form process, an error code will // be returned, which will abort startup of the service. // status = NmpEnablePnpEvents(); if (status != ERROR_SUCCESS) { ClRtlLogPrint(LOG_UNUSUAL, "[NM] A network PnP event occurred during form - abort.\n"); goto error_exit; } // // Check if we formed without any viable networks. The form is still // allowed, but we record an entry in the system event log. // if (!NmpCheckForNetwork()) { ClRtlLogPrint(LOG_UNUSUAL, "[NM] Formed cluster with no viable networks.\n" ); CsLogEvent(LOG_UNUSUAL, NM_EVENT_FORM_WITH_NO_NETWORKS); } // // Force a reconfiguration of multicast parameters and plumb // the results in clusnet. // NmpAcquireLock(); if (NmpIsClusterMulticastReady(TRUE)) { status = NmpStartMulticast(NULL); if (status != ERROR_SUCCESS) { ClRtlLogPrint(LOG_UNUSUAL, "[NM] Failed to start multicast " "on cluster networks, status %1!u!.\n", status ); // // Not a de facto fatal error. // status = ERROR_SUCCESS; } } NmpReleaseLock(); error_exit: if (eventCode != 0) { wsprintfW(&(errorString[0]), L"%u", status); CsLogEvent1(LOG_CRITICAL, eventCode, errorString); } return(status); } // NmFormNewCluster // // // Client-side routines for joining a cluster. // // DWORD NmJoinCluster( IN RPC_BINDING_HANDLE SponsorBinding ) { DWORD status; DWORD sponsorNodeId; PNM_INTERFACE netInterface; PNM_NETWORK network; PNM_NODE node; PLIST_ENTRY nodeEntry, ifEntry; WCHAR errorString[12], errorString2[12]; DWORD eventCode = 0; DWORD versionFlags = 0; extern BOOLEAN bFormCluster; DWORD retry; BOOLEAN joinBegin3 = TRUE; LPWSTR clusterInstanceId = NULL; ClRtlLogPrint(LOG_NOISE, "[NMJOIN] Beginning cluster join process.\n" ); // GN: If a node tries to restart immediately after a clean shutdown, // NmRpcJoinBegin2 can fail with ERROR_CLUSTER_NODE_UP. Since the regroup // incident caused by this node might not be finished. // // If we are getting error CLUSTER_NODE_UP, we will keep retrying for // 12 seconds, hoping that regroup will finish. retry = 120 / 3; // We sleep for 3 seconds. Need to wait 2 minutes // for (;;) { // // Get the join sequence number so we can tell if the cluster // configuration changes during the join process. We overload the // use of the NmpJoinSequence variable since it isn't used in the // sponsor capacity until the node joins. // // // Try NmRpcJoinBegin3. If it fails with an RPC procnum out of // range error, the sponsor is a downlevel node. Revert to // NmRpcJoinBegin2. // if (joinBegin3) { // Only read the cluster instance ID from the registry on // the first try. if (clusterInstanceId == NULL) { DWORD clusterInstanceIdBufSize = 0; DWORD clusterInstanceIdSize = 0; status = NmpQueryString( DmClusterParametersKey, L"ClusterInstanceID", REG_SZ, &clusterInstanceId, &clusterInstanceIdBufSize, &clusterInstanceIdSize ); if (status != ERROR_SUCCESS) { ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] Failed to read cluster instance ID from database, status %1!u!.\n", status ); // Try to join with the downlevel interface. It is // possible that this node was just upgraded and the // last time it was in the cluster there was no // cluster instance ID. joinBegin3 = FALSE; continue; } } status = NmRpcJoinBegin3( SponsorBinding, clusterInstanceId, NmLocalNodeIdString, NmLocalNodeName, CsMyHighestVersion, CsMyLowestVersion, 0, // joiner's major node version 0, // joiner's minor node version L"", // joiner's CsdVersion 0, // joiner's product suite &sponsorNodeId, &NmpJoinSequence, &NmpClusnetEndpoint ); if (status == RPC_S_PROCNUM_OUT_OF_RANGE) { // retry immediately with JoinBegin2 joinBegin3 = FALSE; continue; } } else { status = NmRpcJoinBegin2( SponsorBinding, NmLocalNodeIdString, NmLocalNodeName, CsMyHighestVersion, CsMyLowestVersion, &sponsorNodeId, &NmpJoinSequence, &NmpClusnetEndpoint ); } if ( ((status != ERROR_CLUSTER_NODE_UP && status != ERROR_CLUSTER_JOIN_IN_PROGRESS) ) || retry == 0 ) { break; } ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] Unable to begin join, status %1!u!. Retrying ...\n", status ); CsServiceStatus.dwCheckPoint++; CsAnnounceServiceStatus(); Sleep(3000); --retry; } // Free the cluster instance ID string, if necessary. if (clusterInstanceId != NULL) { midl_user_free(clusterInstanceId); } // [GORN Jan/7/2000] // If we are here, then we have already successfully talked to the sponsor // via JoinVersion interface. // // We shouldn't try to form the cluster if NmRpcJoinBegin2 fails. // Otherwise we may steal the quorum on the move [452108] // // Past this point we will not try to form a cluster // bFormCluster = FALSE; if (status != ERROR_SUCCESS) { eventCode = NM_EVENT_BEGIN_JOIN_FAILED; ClRtlLogPrint(LOG_CRITICAL, "[NMJOIN] Unable to begin join, status %1!u!.\n", status ); goto error_exit; } ClRtlLogPrint(LOG_NOISE, "[NMJOIN] Sponsor node ID = %1!u!. Join sequence number = %2!u!, endpoint = %3!ws!.\n", sponsorNodeId, NmpJoinSequence, NmpClusnetEndpoint ); // // Create all of the cluster objects for which we are responsible. // status = NmpCreateClusterObjects(SponsorBinding); if (status != ERROR_SUCCESS) { goto error_exit; } // The local node version might have changed, fix it // The sponsorer fixes it in the registry and tells other // nodes about it, however the joining node is not a part // of the cluster membership as yet. // The local node structure is created early on in NmInitialize() // hence it must get fixed up if ((NmLocalNode->HighestVersion != CsMyHighestVersion) || (NmLocalNode->LowestVersion != CsMyLowestVersion)) { ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] Local Node version changed probably due to upgrade/deinstall\n"); NmLocalNode->HighestVersion = CsMyHighestVersion; NmLocalNode->LowestVersion = CsMyLowestVersion; NmLocalNodeVersionChanged = TRUE; } //at this point we ready to calculate the cluster version //all the node objects contain the correct node versions NmpResetClusterVersion(FALSE); NmpMulticastInitialize(); // // Enable communication for the local node. // status = ClusnetOnlineNodeComm(NmClusnetHandle, NmLocalNodeId); if (status != ERROR_SUCCESS) { wsprintfW(&(errorString[0]), L"%u", NmLocalNodeId); wsprintfW(&(errorString2[0]), L"%u", status); CsLogEvent2( LOG_CRITICAL, NM_EVENT_CLUSNET_ONLINE_COMM_FAILED, errorString, errorString2 ); ClRtlLogPrint(LOG_CRITICAL, "[NMJOIN] Unable to enable communication for local node, status %1!u!.\n", status ); goto error_exit; } // // Fire up the intracluster RPC server so we can perform the membership // join. // status = ClusterRegisterIntraclusterRpcInterface(); if ( status != ERROR_SUCCESS ) { eventCode = CS_EVENT_RPC_INIT_FAILED; ClRtlLogPrint(LOG_CRITICAL, "ClusSvc: Error starting intracluster RPC server, Status = %1!u!\n", status); goto error_exit; } // // Cycle through the list of cluster nodes and create mutual RPC bindings // for the intracluster interface with each. // for (nodeEntry = NmpNodeList.Flink; nodeEntry != &NmpNodeList; nodeEntry = nodeEntry->Flink ) { node = CONTAINING_RECORD(nodeEntry, NM_NODE, Linkage); if ( (node != NmLocalNode) && ( (node->State == ClusterNodeUp) || (node->State == ClusterNodePaused) ) ) { ClRtlLogPrint(LOG_NOISE, "[NMJOIN] Creating RPC bindings for member node %1!u!\n", node->NodeId ); // // // Cycle through the target node's interfaces // for (ifEntry = node->InterfaceList.Flink; ifEntry != &(node->InterfaceList); ifEntry = ifEntry->Flink ) { netInterface = CONTAINING_RECORD( ifEntry, NM_INTERFACE, NodeLinkage ); network = netInterface->Network; if (NmpIsNetworkForInternalUse(network)) { if ( (network->LocalInterface != NULL) && NmpIsInterfaceRegistered(network->LocalInterface) && NmpIsInterfaceRegistered(netInterface) ) { PNM_INTERFACE localInterface = network->LocalInterface; ClRtlLogPrint(LOG_NOISE, "[NMJOIN] Attempting to use network %1!ws! to " "create bindings for node %2!u!\n", OmObjectName(network), node->NodeId ); status = NmpSetNodeInterfacePriority( node, 0xFFFFFFFF, netInterface, 1 ); if (status == ERROR_SUCCESS) { status = NmRpcCreateBinding( SponsorBinding, NmpJoinSequence, NmLocalNodeIdString, (LPWSTR) OmObjectId(localInterface), (LPWSTR) OmObjectId(node) ); if (status == ERROR_SUCCESS) { // // Create RPC bindings for the target node. // status = NmpCreateRpcBindings(node); if (status == ERROR_SUCCESS) { ClRtlLogPrint(LOG_NOISE, "[NMJOIN] Created binding for node " "%1!u!\n", node->NodeId ); break; } wsprintfW(&(errorString[0]), L"%u", status); CsLogEvent3( LOG_UNUSUAL, NM_EVENT_JOIN_BIND_OUT_FAILED, OmObjectName(node), OmObjectName(network), errorString ); ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] Unable to create binding for " "node %1!u!, status %2!u!.\n", node->NodeId, status ); } else { wsprintfW(&(errorString[0]), L"%u", status); CsLogEvent3( LOG_UNUSUAL, NM_EVENT_JOIN_BIND_IN_FAILED, OmObjectName(node), OmObjectName(network), errorString ); ClRtlLogPrint(LOG_CRITICAL, "[NMJOIN] Member node %1!u! failed to " "create binding to us, status %2!u!\n", node->NodeId, status ); } } else { wsprintfW(&(errorString[0]), L"%u", node->NodeId); wsprintfW(&(errorString2[0]), L"%u", status); CsLogEvent2( LOG_UNUSUAL, NM_EVENT_CLUSNET_SET_INTERFACE_PRIO_FAILED, errorString, errorString2 ); ClRtlLogPrint(LOG_CRITICAL, "[NMJOIN] Failed to set interface priorities " "for node %1!u!, status %2!u!\n", node->NodeId, status ); } } else { status = ERROR_CLUSTER_NODE_UNREACHABLE; ClRtlLogPrint(LOG_NOISE, "[NMJOIN] No matching local interface for " "network %1!ws!\n", OmObjectName(netInterface->Network) ); } } else { status = ERROR_CLUSTER_NODE_UNREACHABLE; ClRtlLogPrint(LOG_NOISE, "[NMJOIN] Network %1!ws! is not used for internal " "communication.\n", OmObjectName(netInterface->Network) ); } } if (status != ERROR_SUCCESS) { // // Cannot make contact with this node. The join fails. // CsLogEvent1( LOG_CRITICAL, NM_EVENT_NODE_UNREACHABLE, OmObjectName(node) ); ClRtlLogPrint(LOG_NOISE, "[NMJOIN] Cluster node %1!u! is not reachable. Join " "failed.\n", node->NodeId ); goto error_exit; } } } CL_ASSERT(status == ERROR_SUCCESS); // // run through the active nodes again, this time establishing // security contexts to use in signing packets // ClRtlLogPrint(LOG_NOISE, "[NMJOIN] Establishing security contexts with all active nodes.\n" ); for (nodeEntry = NmpNodeList.Flink; nodeEntry != &NmpNodeList; nodeEntry = nodeEntry->Flink ) { node = CONTAINING_RECORD(nodeEntry, NM_NODE, Linkage); status = ClMsgCreateActiveNodeSecurityContext(NmpJoinSequence, node); if ( status != ERROR_SUCCESS ) { wsprintfW(&(errorString[0]), L"%u", status); CsLogEvent2( LOG_UNUSUAL, NM_EVENT_CREATE_SECURITY_CONTEXT_FAILED, OmObjectName(node), errorString ); ClRtlLogPrint(LOG_CRITICAL, "[NMJOIN] Unable to establish security context for node %1!u!, status 0x%2!08X!\n", node->NodeId, status ); goto error_exit; } } // // Finally, petition the sponsor for membership // ClRtlLogPrint(LOG_NOISE, "[NMJOIN] Petitioning to join cluster membership.\n" ); #ifdef CLUSTER_TESTPOINT TESTPT(TpFailJoinPetitionForMembership) { status = 999999; goto error_exit; } #endif status = NmRpcPetitionForMembership( SponsorBinding, NmpJoinSequence, NmLocalNodeIdString ); if (status != ERROR_SUCCESS) { // // Our petition was denied. // eventCode = NM_EVENT_PETITION_FAILED; ClRtlLogPrint(LOG_CRITICAL, "[NMJOIN] Petition to join was denied %1!d!\n", status ); goto error_exit; } #ifdef CLUSTER_TESTPOINT TESTPT(TpFailNmJoin) { status = 999999; goto error_exit; } #endif // // Reset the interface priorities for all nodes to default to // the priorities of the associated networks. // NmpAcquireLock(); for (ifEntry = NmpInterfaceList.Flink; ifEntry != &NmpInterfaceList; ifEntry = ifEntry->Flink ) { netInterface = CONTAINING_RECORD(ifEntry, NM_INTERFACE, Linkage); network = netInterface->Network; if ( NmpIsNetworkForInternalUse(network) && NmpIsInterfaceRegistered(netInterface) ) { status = ClusnetSetInterfacePriority( NmClusnetHandle, netInterface->Node->NodeId, netInterface->Network->ShortId, 0 ); CL_ASSERT(status == ERROR_SUCCESS); } } NmpState = NmStateOnline; NmpReleaseLock(); // // Invoke other components to create RPC bindings for each node. // // // Enable our GUM update handler. // GumReceiveUpdates( TRUE, GumUpdateMembership, NmpGumUpdateHandler, NULL, sizeof(NmGumDispatchTable)/sizeof(GUM_DISPATCH_ENTRY), NmGumDispatchTable, NULL ); return(ERROR_SUCCESS); error_exit: if (eventCode != 0) { wsprintfW(&(errorString[0]), L"%u", status); CsLogEvent1(LOG_CRITICAL, eventCode, errorString); } return(status); } // NmJoinCluster BOOLEAN NmpVerifyJoinerConnectivity( IN PNM_NODE JoiningNode, OUT PNM_NODE * UnreachableNode ) { PLIST_ENTRY entry; PNM_NODE node; ClRtlLogPrint(LOG_NOISE, "[NMJOIN] Verifying connectivity to active cluster nodes\n" ); *UnreachableNode = NULL; for (entry = NmpNodeList.Flink; entry != &NmpNodeList; entry = entry->Flink ) { node = CONTAINING_RECORD( entry, NM_NODE, Linkage ); if (NM_NODE_UP(node)) { if (!NmpVerifyNodeConnectivity(JoiningNode, node, NULL)) { *UnreachableNode = node; return(FALSE); } } } return(TRUE); } // NmpVerifyJoinerConnectivity DWORD NmGetJoinSequence( VOID ) { DWORD sequence; NmpAcquireLock(); sequence = NmpJoinSequence; NmpReleaseLock(); return(sequence); } // NmGetJoinSequence DWORD NmJoinComplete( OUT DWORD *EndSeq ) /*++ Routine Description: This routine is called by the initialization sequence once a join has successfully completed and the node can transition from ClusterNodeJoining to ClusterNodeOnline. Arguments: None Return Value: ERROR_SUCCESS if successful Win32 error otherwise. --*/ { DWORD Sequence; DWORD Status; PNM_JOIN_UPDATE JoinUpdate = NULL; DWORD UpdateLength; HDMKEY NodeKey = NULL; DWORD Default = 0; DWORD NumRetries=50; DWORD eventCode = 0; WCHAR errorString[12]; PNM_NETWORK_STATE_ENUM networkStateEnum = NULL; PNM_NETWORK_STATE_INFO networkStateInfo; PNM_INTERFACE_STATE_ENUM interfaceStateEnum = NULL; PNM_INTERFACE_STATE_INFO interfaceStateInfo; DWORD i; PNM_NETWORK network; PNM_INTERFACE netInterface; PLIST_ENTRY entry; DWORD moveCount; BOOLEAN mcast; UpdateLength = sizeof(NM_JOIN_UPDATE) + (lstrlenW(OmObjectId(NmLocalNode))+1)*sizeof(WCHAR); JoinUpdate = LocalAlloc(LMEM_FIXED, UpdateLength); if (JoinUpdate == NULL) { Status = ERROR_NOT_ENOUGH_MEMORY; eventCode = CS_EVENT_ALLOCATION_FAILURE; ClRtlLogPrint(LOG_CRITICAL, "[NMJOIN] Unable to allocate memory.\n"); goto error_exit; } JoinUpdate->JoinSequence = NmpJoinSequence; lstrcpyW(JoinUpdate->NodeId, OmObjectId(NmLocalNode)); NodeKey = DmOpenKey(DmNodesKey, OmObjectId(NmLocalNode), KEY_READ); if (NodeKey == NULL) { Status = GetLastError(); wsprintfW(&(errorString[0]), L"%u", Status); CsLogEvent2( LOG_CRITICAL, CS_EVENT_REG_OPEN_FAILED, OmObjectId(NmLocalNode), errorString ); ClRtlLogPrint( LOG_CRITICAL, "[NMJOIN] Unable to open database key to local node, status %1!u!.\n", Status ); goto error_exit; } retry: Status = GumBeginJoinUpdate(GumUpdateMembership, &Sequence); if (Status != ERROR_SUCCESS) { eventCode = NM_EVENT_GENERAL_JOIN_ERROR; goto error_exit; } // // Get the leader node ID from the sponsor. // Status = NmRpcGetLeaderNodeId( CsJoinSponsorBinding, NmpJoinSequence, NmLocalNodeIdString, &NmpLeaderNodeId ); if (Status != ERROR_SUCCESS) { if (Status == ERROR_CALL_NOT_IMPLEMENTED) { // // The sponsor is an NT4 node. Make this node the leader. // NmpLeaderNodeId = NmLocalNodeId; } else { ClRtlLogPrint(LOG_CRITICAL, "[NMJOIN] Failed to get leader node ID from sponsor, status %1!u!.\n", Status ); goto error_exit; } } ClRtlLogPrint(LOG_NOISE, "[NMJOIN] Node %1!u! is the leader.\n", NmpLeaderNodeId ); // // Fetch the network and interface states from the sponsor // Status = NmRpcEnumNetworkAndInterfaceStates( CsJoinSponsorBinding, NmpJoinSequence, NmLocalNodeIdString, &networkStateEnum, &interfaceStateEnum ); if (Status != ERROR_SUCCESS) { ClRtlLogPrint(LOG_CRITICAL, "[NMJOIN] Failed to get network and interface state values from sponsor, status %1!u!.\n", Status ); goto error_exit; } NmpAcquireLock(); for (i=0; iNetworkCount; i++) { networkStateInfo = &(networkStateEnum->NetworkList[i]); network = OmReferenceObjectById( ObjectTypeNetwork, networkStateInfo->Id ); if (network == NULL) { ClRtlLogPrint(LOG_CRITICAL, "[NMJOIN] Cannot find network %1!ws! to update state.\n", networkStateInfo->Id ); NmpReleaseLock(); NmpFreeNetworkStateEnum(networkStateEnum); LocalFree(JoinUpdate); DmCloseKey(NodeKey); return(ERROR_CLUSTER_NETWORK_NOT_FOUND); } network->State = networkStateInfo->State; OmDereferenceObject(network); } for (i=0; iInterfaceCount; i++) { interfaceStateInfo = &(interfaceStateEnum->InterfaceList[i]); netInterface = OmReferenceObjectById( ObjectTypeNetInterface, interfaceStateInfo->Id ); if (netInterface == NULL) { ClRtlLogPrint(LOG_CRITICAL, "[NMJOIN] Cannot find interface %1!ws! to update state.\n", interfaceStateInfo->Id ); NmpReleaseLock(); NmpFreeInterfaceStateEnum(interfaceStateEnum); LocalFree(JoinUpdate); DmCloseKey(NodeKey); return(ERROR_CLUSTER_NETINTERFACE_NOT_FOUND); } netInterface->State = interfaceStateInfo->State; OmDereferenceObject(netInterface); } NmpReleaseLock(); NmpFreeInterfaceStateEnum(interfaceStateEnum); interfaceStateEnum = NULL; // // Check the registry to see if we should come up paused. // JoinUpdate->IsPaused = Default; Status = DmQueryDword(NodeKey, CLUSREG_NAME_NODE_PAUSED, &JoinUpdate->IsPaused, &Default); if (Status != ERROR_SUCCESS) { ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] Unable to query Paused value for local node, status %1!u!.\n", Status ); } Status = GumEndJoinUpdate(Sequence, GumUpdateMembership, NmUpdateJoinComplete, UpdateLength, JoinUpdate); if (Status != ERROR_SUCCESS) { if (Status == ERROR_CLUSTER_JOIN_ABORTED) { // // The join was aborted by the cluster members. Don't retry. // CsLogEvent(LOG_CRITICAL, NM_EVENT_JOIN_ABORTED); goto error_exit; } ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] GumEndJoinUpdate with sequence %1!d! failed %2!d!\n", Sequence, Status ); if (--NumRetries == 0) { CsLogEvent(LOG_CRITICAL, NM_EVENT_JOIN_ABANDONED); ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] Tried to complete join too many times. Giving up.\n" ); goto error_exit; } goto retry; } NmpAcquireLock(); if (JoinUpdate->IsPaused != 0) { // // We should be coming up paused. // NmLocalNode->State = ClusterNodePaused; } else { // // Set our state to online. // NmLocalNode->State = ClusterNodeUp; } // // Remember whether this cluster meets multicast criteria. // mcast = NmpIsClusterMulticastReady(TRUE); NmpReleaseLock(); // // If the cluster instance ID does not exist, create it now. The cluster // instance ID should be in the database unless this is the first uplevel // node. // NmpCreateClusterInstanceId(); // // Create the cluster key. // Status = NmpRegenerateClusterKey(); if (Status != ERROR_SUCCESS) { ClRtlLogPrint(LOG_UNUSUAL, "[NM] Failed to generate cluster key, status %1!u!. " "Allowing service to continue ...\n", Status ); Status = ERROR_SUCCESS; } // // Finally, enable network PnP event handling. // // If a PnP event occured during the join process, an error code will // be returned, which will abort startup of the service. // Status = NmpEnablePnpEvents(); if (Status != ERROR_SUCCESS) { ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] A network PnP event occurred during join - abort.\n"); goto error_exit; } // // Mark end sequence *EndSeq = Sequence; ClRtlLogPrint(LOG_NOISE, "[NMJOIN] Join complete, node now online\n"); if (mcast) { Status = NmpRefreshClusterMulticastConfiguration(); if (Status != ERROR_SUCCESS) { ClRtlLogPrint(LOG_UNUSUAL, "[NM] Failed to refresh multicast configuration " "for cluster networks, status %1!u!.\n", Status ); // // Not a de facto fatal error. // Status = ERROR_SUCCESS; } } error_exit: if (JoinUpdate != NULL) { LocalFree(JoinUpdate); } if (NodeKey != NULL) { DmCloseKey(NodeKey); } if (eventCode != 0) { wsprintfW(&(errorString[0]), L"%u", Status); CsLogEvent1(LOG_CRITICAL, eventCode, errorString); } return(Status); } // NmJoinComplete // // Server-side routines for sponsoring a joining node. // /* Notes On Joining: Only a single node may join the cluster at any time. A join begins with a JoinBegin global update. A join completes successfully with a JoinComplete global update. A join is aborted with a JoinAbort global update. A timer runs on the sponsor during a join. The timer is suspended while the sponsor is performing work on behalf of the joiner. If the timer expires, a worker thread is scheduled to initiate the abort process. If the sponsor goes down while a join is in progress, the node down handling code on each remaining node will abort the join. */ error_status_t s_NmRpcJoinBegin( IN handle_t IDL_handle, IN LPWSTR JoinerNodeId, IN LPWSTR JoinerNodeName, OUT LPDWORD SponsorNodeId, OUT LPDWORD JoinSequenceNumber, OUT LPWSTR * ClusnetEndpoint ) /*++ Routine Description: Called by a joining node to begin the join process. Issues a JoinBegin global update. --*/ { DWORD status=ERROR_CLUSTER_INCOMPATIBLE_VERSIONS; ClRtlLogPrint(LOG_NOISE, "[NMJOIN] Request by node %1!ws! to begin joining, refused. Using obsolete join interface\n", JoinerNodeId ); if ( status != ERROR_SUCCESS ) { WCHAR errorCode[16]; wsprintfW( errorCode, L"%u", status ); CsLogEvent2( LOG_CRITICAL, NM_EVENT_JOIN_REFUSED, JoinerNodeId, errorCode ); } return(status); } // s_NmRpcJoinBegin // // Server-side routines for sponsoring a joining node. // /* Notes On Joining: */ //#pragma optimize("", off) DWORD NmpJoinBegin( IN LPWSTR JoinerNodeId, IN LPWSTR JoinerNodeName, IN DWORD JoinerHighestVersion, IN DWORD JoinerLowestVersion, OUT LPDWORD SponsorNodeId, OUT LPDWORD JoinSequenceNumber, OUT LPWSTR * ClusnetEndpoint ) /*++ Routine Description: Called from s_NmRpcJoinBegin2 and s_NmRpcJoinBegin3. Contains functionality common to both JoinBegin versions. Notes: Called with NM lock held and NmpLockedEnterApi already called. --*/ { DWORD status = ERROR_SUCCESS; PNM_NODE joinerNode = NULL; LPWSTR endpoint = NULL; joinerNode = OmReferenceObjectById( ObjectTypeNode, JoinerNodeId ); if (joinerNode == NULL) { status = ERROR_CLUSTER_NODE_NOT_MEMBER; ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] Node %1!ws! is not a member of this cluster. Cannot join.\n", JoinerNodeId ); goto FnExit; } endpoint = MIDL_user_allocate(NM_WCSLEN(NmpClusnetEndpoint)); if (endpoint == NULL) { status = ERROR_NOT_ENOUGH_MEMORY; goto FnExit; } lstrcpyW(endpoint, NmpClusnetEndpoint); if (NmpJoinBeginInProgress) { status = ERROR_CLUSTER_JOIN_IN_PROGRESS; ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] Node %1!ws! cannot join because a join is already in progress.\n", JoinerNodeId ); goto FnExit; } // //validate the nodes version's number //ie. check to see what the cluster database //claims this node's version is vs what the node //itself suggests status = NmpValidateNodeVersion( JoinerNodeId, JoinerHighestVersion, JoinerLowestVersion ); //since this node joined, its version has changed //this may happen due to upgrades or reinstall //if this version cant join due to versioning,fail the join if (status == ERROR_REVISION_MISMATCH) { DWORD id = NmGetNodeId(joinerNode); status = NmpIsNodeVersionAllowed( id, JoinerHighestVersion, JoinerLowestVersion, TRUE ); if (status != ERROR_SUCCESS) { ClRtlLogPrint(LOG_NOISE, "[NMJOIN] The version of the cluster prevents Node %1!ws! from joining the cluster\n", JoinerNodeId ); goto FnExit; } } else if (status != ERROR_SUCCESS) { ClRtlLogPrint(LOG_NOISE, "[NMJOIN] The version of Node %1!ws! cannot be validated.\n", JoinerNodeId ); goto FnExit; } // // Lock out other join attempts with this sponsor. // NmpJoinBeginInProgress = TRUE; NmpSuccessfulMMJoin = FALSE; NmpReleaseLock(); status = GumSendUpdateEx( GumUpdateMembership, NmUpdateJoinBegin2, 5, NM_WCSLEN(JoinerNodeId), JoinerNodeId, NM_WCSLEN(JoinerNodeName), JoinerNodeName, NM_WCSLEN(NmLocalNodeIdString), NmLocalNodeIdString, sizeof(DWORD), &JoinerHighestVersion, sizeof(DWORD), &JoinerLowestVersion ); NmpAcquireLock(); CL_ASSERT(NmpJoinBeginInProgress == TRUE); NmpJoinBeginInProgress = FALSE; if (status != ERROR_SUCCESS) { ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] JoinBegin2 update for node %1!ws! failed, status %2!u!\n", JoinerNodeId, status ); goto FnExit; } // // Verify that the join is still in progress with // this node as the sponsor. // if ( (NmpJoinerNodeId == joinerNode->NodeId) && (NmpSponsorNodeId == NmLocalNodeId) ) { // // Give the joiner parameters for future // join-related calls. // *SponsorNodeId = NmLocalNodeId; *JoinSequenceNumber = NmpJoinSequence; // // Start the join timer // NmpJoinTimer = NM_JOIN_TIMEOUT; ClRtlLogPrint(LOG_NOISE, "[NMJOIN] Node %1!ws! has begun the join process.\n", JoinerNodeId ); } else { status = ERROR_CLUSTER_JOIN_ABORTED; ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] Begin join of node %1!ws! was aborted\n", JoinerNodeId ); } FnExit: if (joinerNode) { OmDereferenceObject(joinerNode); } if (status == ERROR_SUCCESS) { *ClusnetEndpoint = endpoint; } else { WCHAR errorCode[16]; if (endpoint) MIDL_user_free(endpoint); wsprintfW( errorCode, L"%u", status ); CsLogEvent2( LOG_CRITICAL, NM_EVENT_JOIN_REFUSED, JoinerNodeId, errorCode ); } return(status); } // NmpJoinBegin error_status_t s_NmRpcJoinBegin2( IN handle_t IDL_handle, IN LPWSTR JoinerNodeId, IN LPWSTR JoinerNodeName, IN DWORD JoinerHighestVersion, IN DWORD JoinerLowestVersion, OUT LPDWORD SponsorNodeId, OUT LPDWORD JoinSequenceNumber, OUT LPWSTR * ClusnetEndpoint ) /*++ Routine Description: Called by a joining node to begin the join process. Issues a JoinBegin global update. --*/ { DWORD status = ERROR_SUCCESS; status = FmDoesQuorumAllowJoin(); if (status != ERROR_SUCCESS) { ClRtlLogPrint(LOG_NOISE, "[NMJOIN] Quorum Characteristics prevent the node %1!ws! to from joining, Status=%2!u!.\n", JoinerNodeId, status ); return(status); } NmpAcquireLock(); ClRtlLogPrint(LOG_NOISE, "[NMJOIN] Processing request by node %1!ws! to begin joining (2).\n", JoinerNodeId ); if (!NmpLockedEnterApi(NmStateOnline)) { status = ERROR_NODE_NOT_AVAILABLE; ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] Cannot sponsor a joining node at this time.\n" ); NmpReleaseLock(); return(status); } status = NmpJoinBegin( JoinerNodeId, JoinerNodeName, JoinerHighestVersion, JoinerLowestVersion, SponsorNodeId, JoinSequenceNumber, ClusnetEndpoint ); NmpLockedLeaveApi(); NmpReleaseLock(); return(status); } // s_NmRpcJoinBegin2 error_status_t s_NmRpcJoinBegin3( IN handle_t IDL_handle, IN LPWSTR JoinerClusterInstanceId, IN LPWSTR JoinerNodeId, IN LPWSTR JoinerNodeName, IN DWORD JoinerHighestVersion, IN DWORD JoinerLowestVersion, IN DWORD JoinerMajorVersion, IN DWORD JoinerMinorVersion, IN LPWSTR JoinerCsdVersion, IN DWORD JoinerProductSuite, OUT LPDWORD SponsorNodeId, OUT LPDWORD JoinSequenceNumber, OUT LPWSTR * ClusnetEndpoint ) { DWORD status = ERROR_SUCCESS; LPWSTR clusterInstanceId = NULL; DWORD clusterInstanceIdBufSize = 0; DWORD clusterInstanceIdSize = 0; ClRtlLogPrint(LOG_NOISE, "[NMJOIN] Processing request by node %1!ws! to begin joining (3).\n", JoinerNodeId ); status = FmDoesQuorumAllowJoin(); if (status != ERROR_SUCCESS) { ClRtlLogPrint(LOG_NOISE, "[NMJOIN] Quorum Characteristics prevent node %1!ws! from joining, Status=%2!u!.\n", JoinerNodeId, status ); return(status); } // // Check our cluster instance ID against the joiner's. // if (NmpClusterInstanceId == NULL || lstrcmpiW(NmpClusterInstanceId, JoinerClusterInstanceId) != 0) { WCHAR errorCode[16]; status = ERROR_CLUSTER_INSTANCE_ID_MISMATCH; ClRtlLogPrint(LOG_CRITICAL, "[NMJOIN] Sponsor cluster instance ID %1!ws! does not match joiner cluster instance id %2!ws!.\n", ((NmpClusterInstanceId == NULL) ? L"" : NmpClusterInstanceId), JoinerClusterInstanceId ); wsprintfW( errorCode, L"%u", status ); CsLogEvent2( LOG_CRITICAL, NM_EVENT_JOIN_REFUSED, JoinerNodeId, errorCode ); return(status); } else { ClRtlLogPrint(LOG_NOISE, "[NMJOIN] Sponsor cluster instance ID matches joiner cluster instance id (%1!ws!).\n", JoinerClusterInstanceId ); } NmpAcquireLock(); if (!NmpLockedEnterApi(NmStateOnline)) { status = ERROR_NODE_NOT_AVAILABLE; ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] Cannot sponsor a joining node at this time.\n" ); } else { status = NmpJoinBegin( JoinerNodeId, JoinerNodeName, JoinerHighestVersion, JoinerLowestVersion, SponsorNodeId, JoinSequenceNumber, ClusnetEndpoint ); NmpLockedLeaveApi(); } NmpReleaseLock(); return(status); } // s_NmRpcJoinBegin3 DWORD NmpUpdateJoinBegin( IN BOOL SourceNode, IN LPWSTR JoinerNodeId, IN LPWSTR JoinerNodeName, IN LPWSTR SponsorNodeId ) { DWORD status=ERROR_CLUSTER_INCOMPATIBLE_VERSIONS; ClRtlLogPrint(LOG_NOISE, "[NMJOIN] Failing update to begin join of node %1!ws! with " "sponsor %2!ws!. Using obsolete join interface.\n", JoinerNodeId, SponsorNodeId ); return(status); } // NmpUpdateJoinBegin DWORD NmpUpdateJoinBegin2( IN BOOL SourceNode, IN LPWSTR JoinerNodeId, IN LPWSTR JoinerNodeName, IN LPWSTR SponsorNodeId, IN LPDWORD JoinerHighestVersion, IN LPDWORD JoinerLowestVersion ) { DWORD status = ERROR_SUCCESS; PNM_NODE sponsorNode=NULL; PNM_NODE joinerNode=NULL; HLOCALXSACTION hXsaction=NULL; BOOLEAN lockAcquired = FALSE; BOOLEAN fakeSuccess = FALSE; ClRtlLogPrint(LOG_NOISE, "[NMJOIN] Received update to begin join (2) of node %1!ws! with " "sponsor %2!ws!.\n", JoinerNodeId, SponsorNodeId ); // // If running with -noquorum flag or if not online, don't sponsor // any node. // if (CsNoQuorum || !NmpEnterApi(NmStateOnline)) { ClRtlLogPrint(LOG_NOISE, "[NM] Not in valid state to begin a join operation.\n" ); return(ERROR_NODE_NOT_AVAILABLE); } // // Find the sponsor node // sponsorNode = OmReferenceObjectById( ObjectTypeNode, SponsorNodeId ); if (sponsorNode == NULL) { status = ERROR_CLUSTER_NODE_NOT_MEMBER; ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] JoinBegin update for node %1!ws! failed because " "sponsor node %2!ws! is not a member of this cluster.\n", JoinerNodeId, SponsorNodeId ); goto FnExit; } // // Find the joiner node // joinerNode = OmReferenceObjectById( ObjectTypeNode, JoinerNodeId ); if (joinerNode == NULL) { status = ERROR_CLUSTER_NODE_NOT_MEMBER; ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] Node %1!ws! is not a member of this cluster. " "Cannot join.\n", JoinerNodeId ); goto FnExit; } hXsaction = DmBeginLocalUpdate(); if (hXsaction == NULL) { status = GetLastError(); ClRtlLogPrint(LOG_CRITICAL, "[NM] Failed to start a transaction, status %1!u!\n", status ); goto FnExit; } NmpAcquireLock(); lockAcquired = TRUE; if (!NM_NODE_UP(sponsorNode)) { // // [GorN 4/3/2000] See bug#98287 // This hack is a kludgy solution to a problem that // a replay of this Gum update after the sponsor death // will fail on all the nodes that didn't see the update. // fakeSuccess = TRUE; status = ERROR_NODE_NOT_AVAILABLE; ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] Sponsor node %1!ws! is not up. Join of node %2!ws! " "failed.\n", SponsorNodeId, JoinerNodeId ); goto FnExit; } // // Check that the joiner is really who we think it is. // if (lstrcmpiW( OmObjectName(joinerNode), JoinerNodeName)) { status = ERROR_CLUSTER_NODE_NOT_MEMBER; ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] Node %1!ws! is not a member of this cluster. " "Cannot join.\n", JoinerNodeName ); goto FnExit; } // // Make sure the joiner is currently down. // if (joinerNode->State != ClusterNodeDown) { status = ERROR_CLUSTER_NODE_UP; ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] Node %1!ws! is not down. Cannot begin join.\n", JoinerNodeId ); goto FnExit; } // // Make sure we aren't already in a join. // if (NmpJoinerNodeId != ClusterInvalidNodeId) { status = ERROR_CLUSTER_JOIN_IN_PROGRESS; ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] Node %1!ws! cannot begin join because a join is " "already in progress for node %2!u!.\n", JoinerNodeId, NmpJoinerNodeId ); goto FnExit; } // // Perform the version compatibility check. // status = NmpIsNodeVersionAllowed( NmGetNodeId(joinerNode), *JoinerHighestVersion, *JoinerLowestVersion, TRUE ); if (status != ERROR_SUCCESS) { ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] The version of the cluster prevents Node %1!ws! " "from joining the cluster\n", JoinerNodeId ); goto FnExit; } // Fix up the joiner's version number if needed. // status = NmpValidateNodeVersion( JoinerNodeId, *JoinerHighestVersion, *JoinerLowestVersion ); if (status == ERROR_REVISION_MISMATCH) { // // At this point, the registry contains the new // versions for the joining code. // The new node information should be reread // from the registry before resetting the cluster // version // make sure the joiner gets the database from the // sponsor after the fixups have occured // status = NmpJoinFixupNodeVersion( hXsaction, JoinerNodeId, *JoinerHighestVersion, *JoinerLowestVersion ); if (status != ERROR_SUCCESS) { ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] Node %1!ws! failed to fixup its node version\r\n", JoinerNodeId); goto FnExit; } } else if (status != ERROR_SUCCESS) { ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] The verison of Node %1!ws! could not be validated\n", JoinerNodeId); goto FnExit; } // //at this point we ready to calculate the cluster version //all the node versions are in the registry, the fixups have //been made if neccessary // NmpResetClusterVersion(TRUE); // // Enable communication to the joiner. // // This must be the last test that can fail before the join is allowed // to proceed. // status = ClusnetOnlineNodeComm(NmClusnetHandle, joinerNode->NodeId); if (status != ERROR_SUCCESS) { if (status != ERROR_CLUSTER_NODE_ALREADY_UP) { ClRtlLogPrint(LOG_CRITICAL, "[NMJOIN] Failed to enable communication for node %1!u!, " "status %2!u!\n", JoinerNodeId, status ); goto FnExit; } else { status = ERROR_SUCCESS; } } // // Officially begin the join process // CL_ASSERT(NmpJoinTimer == 0); CL_ASSERT(NmpJoinAbortPending == FALSE); CL_ASSERT(NmpJoinerUp == FALSE); CL_ASSERT(NmpSponsorNodeId == ClusterInvalidNodeId); NmpJoinerNodeId = joinerNode->NodeId; NmpSponsorNodeId = sponsorNode->NodeId; NmpJoinerOutOfSynch = FALSE; NmpJoinSequence = GumGetCurrentSequence(GumUpdateMembership); joinerNode->State = ClusterNodeJoining; ClusterEvent( CLUSTER_EVENT_NODE_JOIN, joinerNode ); NmpCleanupIfJoinAborted = TRUE; ClRtlLogPrint(LOG_NOISE, "[NMJOIN] Node %1!ws! join sequence = %2!u!\n", JoinerNodeId, NmpJoinSequence ); CL_ASSERT(status == ERROR_SUCCESS); FnExit: if (lockAcquired) { NmpLockedLeaveApi(); NmpReleaseLock(); } else { NmpLeaveApi(); } if (hXsaction != NULL) { if (status == ERROR_SUCCESS) { DmCommitLocalUpdate(hXsaction); } else { DmAbortLocalUpdate(hXsaction); } } if (joinerNode != NULL) { OmDereferenceObject(joinerNode); } if (sponsorNode != NULL) { OmDereferenceObject(sponsorNode); } if (fakeSuccess) { status = ERROR_SUCCESS; } return(status); } // NmpUpdateJoinBegin2 DWORD NmpCreateRpcBindings( IN PNM_NODE Node ) { DWORD status; // // Create the default binding for the whole cluster service // status = ClMsgCreateDefaultRpcBinding( Node, &Node->DefaultRpcBindingGeneration); if (status != ERROR_SUCCESS) { return(status); } // // Create private bindings for the NM's use. // We create one for reporting network connectivity and one for // performing network failure isolation. The NM uses the // default binding for operations on behalf of joining nodes. // if (Node->ReportRpcBinding != NULL) { // // Reuse the old binding. // status = ClMsgVerifyRpcBinding(Node->ReportRpcBinding); if (status != ERROR_SUCCESS) { ClRtlLogPrint(LOG_CRITICAL, "[NM] Failed to verify RPC binding for node %1!u!, " "status %2!u!.\n", Node->NodeId, status ); return(status); } } else { // // Create a new binding // status = ClMsgCreateRpcBinding( Node, &(Node->ReportRpcBinding), 0 ); if (status != ERROR_SUCCESS) { ClRtlLogPrint(LOG_CRITICAL, "[NM] Failed to create RPC binding for node %1!u!, " "status %2!u!.\n", Node->NodeId, status ); return(status); } } if (Node->IsolateRpcBinding != NULL) { // // Reuse the old binding. // status = ClMsgVerifyRpcBinding(Node->IsolateRpcBinding); if (status != ERROR_SUCCESS) { ClRtlLogPrint(LOG_CRITICAL, "[NM] Failed to verify RPC binding for node %1!u!, " "status %2!u!.\n", Node->NodeId, status ); return(status); } } else { // // Create a new binding // status = ClMsgCreateRpcBinding( Node, &(Node->IsolateRpcBinding), 0 ); if (status != ERROR_SUCCESS) { ClRtlLogPrint(LOG_CRITICAL, "[NM] Failed to create RPC binding for node %1!u!, " "status %2!u!.\n", Node->NodeId, status ); return(status); } } // // Call other components to create their private bindings // status = GumCreateRpcBindings(Node); if (status != ERROR_SUCCESS) { return(status); } status = EvCreateRpcBindings(Node); if (status != ERROR_SUCCESS) { return(status); } status = FmCreateRpcBindings(Node); if (status != ERROR_SUCCESS) { return(status); } return(ERROR_SUCCESS); } // NmpCreateRpcBindings error_status_t s_NmRpcCreateBinding( IN handle_t IDL_handle, IN DWORD JoinSequence, IN LPWSTR JoinerNodeId, IN LPWSTR JoinerInterfaceId, IN LPWSTR MemberNodeId ) { DWORD status; NmpAcquireLock(); ClRtlLogPrint(LOG_NOISE, "[NMJOIN] Processing CreateBinding request from joining node %1!ws! for member node %2!ws!\n", JoinerNodeId, MemberNodeId ); if (NmpLockedEnterApi(NmStateOnlinePending)) { PNM_NODE joinerNode = OmReferenceObjectById( ObjectTypeNode, JoinerNodeId ); if (joinerNode != NULL) { if ( (JoinSequence == NmpJoinSequence) && (NmpJoinerNodeId == joinerNode->NodeId) && (NmpSponsorNodeId == NmLocalNodeId) && !NmpJoinAbortPending ) { PNM_NODE memberNode; CL_ASSERT(joinerNode->State == ClusterNodeJoining); CL_ASSERT(NmpJoinerUp == FALSE); CL_ASSERT(NmpJoinTimer != 0); // // Suspend the join timer while we are working on // behalf of the joiner. This precludes an abort // from occuring as well. // NmpJoinTimer = 0; memberNode = OmReferenceObjectById( ObjectTypeNode, MemberNodeId ); if (memberNode != NULL) { PNM_INTERFACE netInterface = OmReferenceObjectById( ObjectTypeNetInterface, JoinerInterfaceId ); if (netInterface != NULL) { if (memberNode == NmLocalNode) { status = NmpCreateJoinerRpcBindings( joinerNode, netInterface ); } else { if (NM_NODE_UP(memberNode)) { DWORD joinSequence = NmpJoinSequence; RPC_BINDING_HANDLE binding = Session[memberNode->NodeId]; CL_ASSERT(binding != NULL); NmpReleaseLock(); NmStartRpc(memberNode->NodeId); status = NmRpcCreateJoinerBinding( binding, joinSequence, JoinerNodeId, JoinerInterfaceId ); NmEndRpc(memberNode->NodeId); if(status != RPC_S_OK) { NmDumpRpcExtErrorInfo(status); } NmpAcquireLock(); } else { status = ERROR_CLUSTER_NODE_DOWN; ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] CreateBinding call for joining node %1!ws! failed because member node %2!ws! is down.\n", JoinerNodeId, MemberNodeId ); } } OmDereferenceObject(netInterface); } else { status = ERROR_CLUSTER_NETINTERFACE_NOT_FOUND; ClRtlLogPrint(LOG_CRITICAL, "[NMJOIN] Can't create binding for joining node %1!ws! - interface %2!ws! doesn't exist.\n", JoinerNodeId, JoinerInterfaceId ); } OmDereferenceObject(memberNode); } else { status = ERROR_CLUSTER_NODE_NOT_FOUND; ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] CreateBinding call for joining node %1!ws! failed because member node %2!ws! does not exist\n", JoinerNodeId, MemberNodeId ); } // // Verify that the join is still in progress // if ( (JoinSequence == NmpJoinSequence) && (NmpJoinerNodeId == joinerNode->NodeId) ) { CL_ASSERT(joinerNode->State == ClusterNodeJoining); CL_ASSERT(NmpJoinerUp == FALSE); CL_ASSERT(NmpSponsorNodeId == NmLocalNodeId); CL_ASSERT(NmpJoinTimer == 0); CL_ASSERT(NmpJoinAbortPending == FALSE); // // Restart the join timer. // NmpJoinTimer = NM_JOIN_TIMEOUT; } else { status = ERROR_CLUSTER_JOIN_ABORTED; ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] CreateBinding call for joining node %1!ws! failed because the join was aborted.\n", JoinerNodeId ); } } else { status = ERROR_CLUSTER_JOIN_ABORTED; ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] CreateBinding call for joining node %1!ws! failed because the join was aborted.\n", JoinerNodeId ); } OmDereferenceObject(joinerNode); } else { status = ERROR_CLUSTER_NODE_NOT_MEMBER; ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] CreateBinding call for joining node %1!ws! failed because the node is not a member of the cluster.\n", JoinerNodeId ); } NmpLockedLeaveApi(); } NmpReleaseLock(); return(status); } // s_NmRpcCreateBinding error_status_t s_NmRpcCreateJoinerBinding( IN handle_t IDL_handle, IN DWORD JoinSequence, IN LPWSTR JoinerNodeId, IN LPWSTR JoinerInterfaceId ) /*++ Notes: The sponsor is responsible for aborting the join on failure. --*/ { DWORD status; NmpAcquireLock(); ClRtlLogPrint(LOG_NOISE, "[NMJOIN] Processing CreateBinding request for joining node %1!ws!.\n", JoinerNodeId ); if (NmpLockedEnterApi(NmStateOnline)) { PNM_NODE joinerNode = OmReferenceObjectById( ObjectTypeNode, JoinerNodeId ); if (joinerNode != NULL) { PNM_INTERFACE netInterface = OmReferenceObjectById( ObjectTypeNetInterface, JoinerInterfaceId ); if (netInterface != NULL) { // // Verify that a join is still in progress. // if ( (JoinSequence == NmpJoinSequence) && (NmpJoinerNodeId == joinerNode->NodeId) ) { status = NmpCreateJoinerRpcBindings( joinerNode, netInterface ); if (status != ERROR_SUCCESS) { WCHAR errorString[12]; wsprintfW(&(errorString[0]), L"%u", status); CsLogEvent3( LOG_UNUSUAL, NM_EVENT_JOINER_BIND_FAILED, OmObjectName(joinerNode), OmObjectName(netInterface->Network), errorString ); } } else { status = ERROR_CLUSTER_JOIN_ABORTED; ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] Failing create bindings for joining node %1!ws! because the join was aborted\n", JoinerNodeId ); } OmDereferenceObject(netInterface); } else { status = ERROR_CLUSTER_NETINTERFACE_NOT_FOUND; ClRtlLogPrint(LOG_CRITICAL, "[NMJOIN] Can't create binding for joining node %1!ws! - no corresponding interface for joiner interface %2!ws!.\n", JoinerNodeId, JoinerInterfaceId ); } OmDereferenceObject(joinerNode); } else { status = ERROR_CLUSTER_NODE_NOT_MEMBER; ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] CreateBinding call for joining node %1!ws! failed because the node is not a member of the cluster.\n", JoinerNodeId ); } NmpLockedLeaveApi(); } else { status = ERROR_NODE_NOT_AVAILABLE; ClRtlLogPrint(LOG_NOISE, "[NMJOIN] Not in valid state to process the request.\n" ); } NmpReleaseLock(); return(status); } // s_NmRpcCreateJoinerBinding DWORD NmpCreateJoinerRpcBindings( IN PNM_NODE JoinerNode, IN PNM_INTERFACE JoinerInterface ) /*++ Notes: Called with the NmpLock held. --*/ { DWORD status; PNM_NETWORK network = JoinerInterface->Network; CL_NODE_ID joinerNodeId = JoinerNode->NodeId; CL_ASSERT(JoinerNode->NodeId == NmpJoinerNodeId); CL_ASSERT(JoinerNode->State == ClusterNodeJoining); ClRtlLogPrint(LOG_NOISE, "[NMJOIN] Creating bindings for joining node %1!u! using network %2!ws!\n", joinerNodeId, OmObjectName(JoinerInterface->Network) ); // // Make sure that this node has an interface on the target network. // if (NmpIsNetworkForInternalUse(network)) { if (network->LocalInterface != NULL) { if ( NmpIsInterfaceRegistered(JoinerInterface) && NmpIsInterfaceRegistered(network->LocalInterface) ) { status = NmpSetNodeInterfacePriority( JoinerNode, 0xFFFFFFFF, JoinerInterface, 1 ); if (status == ERROR_SUCCESS) { PNM_INTERFACE localInterface = network->LocalInterface; // // Create intracluster RPC bindings for the petitioner. // The MM relies on these to perform the join. // OmReferenceObject(localInterface); OmReferenceObject(JoinerNode); NmpReleaseLock(); status = NmpCreateRpcBindings(JoinerNode); NmpAcquireLock(); if (status != ERROR_SUCCESS) { ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] Unable to create RPC binding for " "joining node %1!u!, status %2!u!.\n", joinerNodeId, status ); } OmDereferenceObject(JoinerNode); OmDereferenceObject(localInterface); } else { ClRtlLogPrint(LOG_CRITICAL, "[NMJOIN] Failed to set interface priority for " "network %1!ws! (%2!ws!), status %3!u!\n", OmObjectId(network), OmObjectName(network), status ); } } else { status = ERROR_CLUSTER_NODE_UNREACHABLE; } } else { status = ERROR_CLUSTER_NETINTERFACE_NOT_FOUND; } } else { status = ERROR_CLUSTER_NODE_UNREACHABLE; } if (status !=ERROR_SUCCESS) { ClRtlLogPrint(LOG_CRITICAL, "[NMJOIN] Failed to create binding for joining node %1!u! " "on network %2!ws! (%3!ws!), status %4!u!\n", joinerNodeId, OmObjectId(network), OmObjectName(network), status ); } return(status); } // NmpCreateJoinerRpcBinding error_status_t s_NmRpcPetitionForMembership( IN handle_t IDL_handle, IN DWORD JoinSequence, IN LPCWSTR JoinerNodeId ) /*++ Routine Description: Server side of a join petition. Arguments: IDL_handle - RPC binding handle, not used. JoinSequence - Supplies the sequence returned from NmRpcJoinBegin JoinerNodeId - Supplies the ID of the node attempting to join. Return Value: ERROR_SUCCESS if successful Win32 error otherwise. --*/ { DWORD status; PNM_NODE joinerNode; #ifdef CLUSTER_TESTPOINT TESTPT(TestpointJoinFailPetition) { return(999999); } #endif NmpAcquireLock(); ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] Processing petition to join from node %1!ws!.\n", JoinerNodeId ); if (NmpLockedEnterApi(NmStateOnline)) { joinerNode = OmReferenceObjectById(ObjectTypeNode, JoinerNodeId); if (joinerNode != NULL) { // // Verify that the join is still in progress // // // DavidDio 6/13/2000 // There is a small window where a begin join update can // succeed during a regroup, but the regroup ends before // the joining node petitions to join. In this case, the // node will be marked out of sync. Aborting the join // after MMJoin() is much more heavyweight than before, // so check for this condition now. (Bug 125778). // if ( (JoinSequence == NmpJoinSequence) && (NmpJoinerNodeId == joinerNode->NodeId) && (NmpSponsorNodeId == NmLocalNodeId) && (!NmpJoinAbortPending) && (!NmpJoinerOutOfSynch) ) { ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] Performing join.\n"); CL_ASSERT(joinerNode->State == ClusterNodeJoining); CL_ASSERT(NmpJoinerUp == FALSE); CL_ASSERT(NmpJoinTimer != 0); // // Call the MM to join this node to the cluster membership. // Disable the join timer. Once the node becomes an active // member, we won't need it anymore. // NmpJoinTimer = 0; NmpReleaseLock(); status = MMJoin( joinerNode->NodeId, NM_CLOCK_PERIOD, NM_SEND_HB_RATE, NM_RECV_HB_RATE, NM_MM_JOIN_TIMEOUT ); NmpAcquireLock(); // // Verify that the join is still in progress // if ( (JoinSequence == NmpJoinSequence) && (NmpJoinerNodeId == joinerNode->NodeId) ) { CL_ASSERT(NmpSponsorNodeId == NmLocalNodeId); CL_ASSERT(joinerNode->State == ClusterNodeJoining); CL_ASSERT(NmpJoinTimer == 0); CL_ASSERT(NmpJoinAbortPending == FALSE); // GorN 3/22/2000 // We hit a case when MMJoin has succeeded after a regroup // that killed one of the nodes (not joiner and not sponsor) // thus leaving the joiner out of sync // We need to abourt the join in this case too if (status != MM_OK || NmpJoinerOutOfSynch) { status = MMMapStatusToDosError(status); if (NmpJoinerOutOfSynch) { status = ERROR_CLUSTER_JOIN_ABORTED; } // // Abort the join // ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] Petition to join by node %1!ws! failed, status %2!u!.\n", JoinerNodeId, status ); // // If MMJoin was unsuccessful it initiates a banishing // regroup. This regroup will deliver node down events // on all nodes that saw hb's from the joiner. // // Calling MMBlockIfRegroupIsInProgress here will guarantee that // Phase2 cleanup is complete on all nodes, before we // call NmpJoinAbort. // NmpReleaseLock(); MMBlockIfRegroupIsInProgress(); NmpAcquireLock(); NmpJoinAbort(status, joinerNode); } else { NmpSuccessfulMMJoin = TRUE; ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] Petition to join by node %1!ws! succeeded.\n", JoinerNodeId ); } #ifdef MM_IN_CLUSNET if (status == MM_OK) { status = NmJoinNodeToCluster(joinerNodeId); if (status != ERROR_SUCCESS) { DWORD clusnetStatus; ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] Join of node %1!ws! failed, status %2!u!.\n", JoinerNodeId, status ); CL_LOGFAILURE( status ); NmpReleaseLock(); MMEject(joinerNodeId); NmpAcquireLock(); clusnetStatus = ClusnetOfflineNodeComm( NmClusnetHandle, joinerNodeId ); CL_ASSERT( (status == ERROR_SUCCESS) || (status == ERROR_CLUSTER_NODE_ALREADY_DOWN ); } else { ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] Join completed successfully.\n" ); } } #endif // MM_IN_CLUSNET } else { status = ERROR_CLUSTER_JOIN_ABORTED; ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] Petition to join by node %1!ws! failed because the join was aborted.\n", JoinerNodeId ); } } else { status = ERROR_CLUSTER_JOIN_ABORTED; ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] Petition by node %1!ws! failed because the join was aborted\n", JoinerNodeId ); } OmDereferenceObject(joinerNode); } else { status = ERROR_CLUSTER_NODE_NOT_MEMBER; ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] Petition to join by %1!ws! failed because the node is not a cluster member\n", JoinerNodeId ); } NmpLockedLeaveApi(); } else { status = ERROR_NODE_NOT_AVAILABLE; ClRtlLogPrint(LOG_NOISE, "[NMJOIN] Not in valid state to process the request.\n" ); } NmpReleaseLock(); return(status); } // s_NmRpcPetitionForMembership error_status_t s_NmRpcGetLeaderNodeId( IN handle_t IDL_handle, IN DWORD JoinSequence, OPTIONAL IN LPWSTR JoinerNodeId, OPTIONAL OUT LPDWORD LeaderNodeId ) { DWORD status = ERROR_SUCCESS; PNM_NODE joinerNode = NULL; NmpAcquireLock(); if (NmpLockedEnterApi(NmStateOnline)){ joinerNode = OmReferenceObjectById( ObjectTypeNode, JoinerNodeId ); if (joinerNode != NULL) { if ( (JoinSequence == NmpJoinSequence) && (NmpJoinerNodeId == joinerNode->NodeId) && (NmpSponsorNodeId == NmLocalNodeId) && !NmpJoinAbortPending ) { CL_ASSERT(joinerNode->State == ClusterNodeJoining); *LeaderNodeId = NmpLeaderNodeId; } else { status = ERROR_CLUSTER_JOIN_ABORTED; ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] GetLeaderNodeId call for joining node %1!ws! failed because the join was aborted.\n", JoinerNodeId ); } OmDereferenceObject(joinerNode); } else { status = ERROR_CLUSTER_NODE_NOT_MEMBER; ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] GetLeaderNodeId call for joining node %1!ws! failed because the node is not a member of the cluster.\n", JoinerNodeId ); } NmpLockedLeaveApi(); } else { status = ERROR_NODE_NOT_AVAILABLE; ClRtlLogPrint(LOG_NOISE, "[NMJOIN] Not in valid state to process GetLeaderNodeId request.\n" ); } NmpReleaseLock(); return(status); } // s_NmRpcGetLeaderNodeId DWORD NmpUpdateJoinComplete( IN PNM_JOIN_UPDATE JoinUpdate ) { DWORD status; NmpAcquireLock(); ClRtlLogPrint(LOG_NOISE, "[NMJOIN] Processing JoinComplete update from node %1!ws!\n", JoinUpdate->NodeId ); if (NmpLockedEnterApi(NmStateOnline)) { PNM_NODE joinerNode; LPWSTR joinerIdString = JoinUpdate->NodeId; joinerNode = OmReferenceObjectById(ObjectTypeNode, joinerIdString); if (joinerNode != NULL) { CL_ASSERT(joinerNode != NmLocalNode); // // Verify that the join is still in progress and nothing has // changed. // if ( (JoinUpdate->JoinSequence == NmpJoinSequence) && (NmpJoinerNodeId == joinerNode->NodeId) && (joinerNode->State == ClusterNodeJoining) && NmpJoinerUp && !NmpJoinerOutOfSynch ) { PNM_INTERFACE netInterface; PNM_NETWORK network; PLIST_ENTRY ifEntry; NmpJoinerNodeId = ClusterInvalidNodeId; NmpSponsorNodeId = ClusterInvalidNodeId; NmpJoinTimer = 0; NmpJoinAbortPending = FALSE; NmpJoinSequence = 0; NmpJoinerUp = FALSE; if (JoinUpdate->IsPaused != 0) { // // This node is coming up in the paused state. // joinerNode->State = ClusterNodePaused; } else { joinerNode->State = ClusterNodeUp; } joinerNode->ExtendedState = ClusterNodeJoining; ClusterEvent(CLUSTER_EVENT_NODE_UP, (PVOID)joinerNode); // // Reset the interface priorities for this node. // for (ifEntry = joinerNode->InterfaceList.Flink; ifEntry != &joinerNode->InterfaceList; ifEntry = ifEntry->Flink ) { netInterface = CONTAINING_RECORD( ifEntry, NM_INTERFACE, NodeLinkage ); network = netInterface->Network; if ( NmpIsNetworkForInternalUse(network) && NmpIsInterfaceRegistered(netInterface) ) { status = ClusnetSetInterfacePriority( NmClusnetHandle, joinerNode->NodeId, network->ShortId, 0 ); CL_ASSERT(status == ERROR_SUCCESS); } } status = ERROR_SUCCESS; } else { status = ERROR_CLUSTER_JOIN_ABORTED; ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] Join of node %1!ws! cannot complete because the join was aborted\n", joinerIdString ); } OmDereferenceObject(joinerNode); } else { status =ERROR_CLUSTER_NODE_NOT_MEMBER; ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] Join of node %1!ws! cannot complete because the node is not a cluster member.\n", joinerIdString ); } NmpLockedLeaveApi(); } else { status = ERROR_NODE_NOT_AVAILABLE; ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] Not in valid state to process JoinComplete update.\n" ); } // // If the multicast shared key is based on the cluster service account // password, we may need to refresh, since the password might have // changed and the joiner will be running under the new password. // if (status == ERROR_SUCCESS) { status = NmpMulticastRegenerateKey(NULL); if (status != ERROR_SUCCESS) { ClRtlLogPrint(LOG_UNUSUAL, "[NM] Failed to regenerate cluster network multicast " "keys, status %1!u!.\n", status ); // // Not a de facto fatal error. // status = ERROR_SUCCESS; } } NmpReleaseLock(); // DavidDio 10/27/2000 // Bug 213781: NmpUpdateJoinComplete must always return ERROR_SUCCESS. // Otherwise, there is a small window whereby GUM sequence numbers on // remaining cluster nodes can fall out of sync. If the join should // be aborted, return ERROR_SUCCESS but poison the joiner out-of-band. if (status != ERROR_SUCCESS) { DWORD dwJoinerId; if (JoinUpdate->NodeId != NULL) { dwJoinerId = wcstoul(JoinUpdate->NodeId, NULL, 10); ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] Join of node %1!u! failed with status %2!u!. Initiating banishment.\n", dwJoinerId, status ); NmAdviseNodeFailure(dwJoinerId, status); } else { dwJoinerId = ClusterInvalidNodeId; ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] Join of node %1!ws! failed with status %2!u!. Cannot initiate banishment as node id is unknown.\n", dwJoinerId, status ); } } return(ERROR_SUCCESS); } // NmpUpdateJoinComplete DWORD NmpUpdateJoinAbort( IN BOOL SourceNode, IN LPDWORD JoinSequence, IN LPWSTR JoinerNodeId ) /*++ Notes: --*/ { DWORD status = ERROR_SUCCESS; NmpAcquireLock(); ClRtlLogPrint(LOG_NOISE, "[NMJOIN] Received update to abort join sequence %1!u! (joiner id %2!ws!).\n", *JoinSequence, JoinerNodeId ); if (NmpLockedEnterApi(NmStateOnline)) { PNM_NODE joinerNode = OmReferenceObjectById( ObjectTypeNode, JoinerNodeId ); if (joinerNode != NULL) { // // Check if the specified join is still in progress. // if ( (*JoinSequence == NmpJoinSequence) && (NmpJoinerNodeId == joinerNode->NodeId) ) { CL_ASSERT(NmpSponsorNodeId != ClusterInvalidNodeId); CL_ASSERT(joinerNode->State == ClusterNodeJoining); // // Assumption: // // An abort cannot occur during the MM join process. // If the joiner is not already up, it cannot come up // during the abort processing. // // Assert condition may not be true with the current MM join code. // Some nodes might have got monitor node and set // NmpJoinerUp state to TRUE by the time the sponsor issued // an abort update // //CL_ASSERT(NmpJoinerUp == FALSE); if (NmpCleanupIfJoinAborted) { NmpCleanupIfJoinAborted = FALSE; ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] Issuing a node down event for %1!u!.\n", joinerNode->NodeId ); // // This node is not yet active in the membership. // Call the node down event handler to finish the abort. // // // We will not call NmpMsgCleanup1 and NmpMsgCleanup2, // because we cannot guarantee that they will get executed // in a barrier style fashion // // !!! Lock will be acquired by NmpNodeDownEventHandler // second time. Is it OK? // NmpNodeDownEventHandler(joinerNode); } else { ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] Node down was already issued for %1!u!.\n", joinerNode->NodeId ); } } else { ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] Ignoring old join abort update with sequence %1!u!.\n", *JoinSequence ); } OmDereferenceObject(joinerNode); status = ERROR_SUCCESS; } else { status = ERROR_CLUSTER_NODE_NOT_MEMBER; ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] Join of node %1!ws! cannot be aborted because the node is not a cluster member.\n", JoinerNodeId ); } NmpLockedLeaveApi(); } else { status = ERROR_NODE_NOT_AVAILABLE; ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] Not in valid state to process JoinAbort update.\n" ); } NmpReleaseLock(); return(status); } // NmpUpdateJoinAbort VOID NmpJoinAbort( DWORD AbortStatus, PNM_NODE JoinerNode ) /*++ Routine Description: Issues a JoinAbort update. Notes: Called with the NmpLock held. --*/ { DWORD status; DWORD joinSequence = NmpJoinSequence; WCHAR errorString[12]; CL_ASSERT(NmpJoinerNodeId != ClusterInvalidNodeId); CL_ASSERT(NmpSponsorNodeId == NmLocalNodeId); CL_ASSERT(JoinerNode->State == ClusterNodeJoining); if (AbortStatus == ERROR_TIMEOUT) { wsprintfW(&(errorString[0]), L"%u", AbortStatus); CsLogEvent1( LOG_CRITICAL, NM_EVENT_JOIN_TIMED_OUT, OmObjectName(JoinerNode) ); } else { wsprintfW(&(errorString[0]), L"%u", AbortStatus); CsLogEvent2( LOG_CRITICAL, NM_EVENT_SPONSOR_JOIN_ABORTED, OmObjectName(JoinerNode), errorString ); } // // Assumption: // // An abort cannot occur during the MM join process. If the joiner // is not already up, it cannot come up during the abort processing. // if (NmpSuccessfulMMJoin == FALSE) { // // The joining node has not become active yet. Issue // an abort update. // DWORD joinSequence = NmpJoinSequence; ClRtlLogPrint(LOG_NOISE, "[NMJOIN] Issuing update to abort join of node %1!u!.\n", NmpJoinerNodeId ); NmpReleaseLock(); status = GumSendUpdateEx( GumUpdateMembership, NmUpdateJoinAbort, 2, sizeof(DWORD), &joinSequence, NM_WCSLEN(OmObjectId(JoinerNode)), OmObjectId(JoinerNode) ); NmpAcquireLock(); } else { // // The joining node is already active in the membership. // Ask the MM to kick it out. The node down event will // finish the abort process. // CL_NODE_ID joinerNodeId = NmpJoinerNodeId; ClRtlLogPrint(LOG_NOISE, "[NMJOIN] Ejecting joining node %1!u! from the cluster membership.\n", NmpJoinerNodeId ); NmpReleaseLock(); status = MMEject(joinerNodeId); NmpAcquireLock(); } if (status != MM_OK) { status = MMMapStatusToDosError(status); ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] Update to abort join of node %1!u! failed, status %2!u!\n", JoinerNode->NodeId, status ); // // If the join is still pending, and this is the sponsor node, // force a timeout to retry the abort. If we aren't the sponsor, // there isn't much we can do. // if ( (joinSequence == NmpJoinSequence) && (NmpJoinerNodeId == JoinerNode->NodeId) && (NmpSponsorNodeId == NmLocalNodeId) ) { NmpJoinTimer = 1; NmpJoinAbortPending = FALSE; } } return; } // NmpJoinAbort VOID NmpJoinAbortWorker( IN PCLRTL_WORK_ITEM WorkItem, IN DWORD Status, IN DWORD BytesTransferred, IN ULONG_PTR IoContext ) /*++ Routine Description: Worker thread for aborting a join. --*/ { DWORD joinSequence = PtrToUlong(WorkItem->Context); NmpAcquireLock(); // // The active thread count was bumped up when this item was scheduled. // No need to call NmpEnterApi(). // // // If the join is still pending, begin the abort process. // if ( (joinSequence == NmpJoinSequence) && (NmpJoinerNodeId != ClusterInvalidNodeId) && NmpJoinAbortPending ) { PNM_NODE joinerNode = NmpIdArray[NmpJoinerNodeId]; if (joinerNode != NULL) { ClRtlLogPrint(LOG_NOISE, "[NMJOIN] Worker thread initiating abort of joining node %1!u!\n", NmpJoinerNodeId ); NmpJoinAbort(ERROR_TIMEOUT, joinerNode); } else { CL_ASSERT(joinerNode != NULL); } } else { ClRtlLogPrint(LOG_NOISE, "[NMJOIN] Skipping join abort, sequence to abort %1!u!, current join sequence %2!u!, " "joiner node: %3!u! sponsor node: %4!u!\n", joinSequence, NmpJoinSequence, NmpJoinerNodeId, NmpSponsorNodeId ); } NmpLockedLeaveApi(); NmpReleaseLock(); LocalFree(WorkItem); return; } // NmpJoinAbortWorker VOID NmpJoinTimerTick( IN DWORD MsTickInterval ) /*++ Notes: Called with NmpLock held. --*/ { if (NmpLockedEnterApi(NmStateOnline)) { // // If we are sponsoring a join, update the timer. // if ( (NmpJoinerNodeId != ClusterInvalidNodeId) && (NmpSponsorNodeId == NmLocalNodeId) && !NmpJoinAbortPending && (NmpJoinTimer != 0) ) { //ClRtlLogPrint(LOG_NOISE, // "[NMJOIN] Timer tick (%1!u! ms)\n", // Interval // ); if (NmpJoinTimer > MsTickInterval) { NmpJoinTimer -= MsTickInterval; } else { // // The join has timed out. Schedule a worker thread to // carry out the abort process. // PCLRTL_WORK_ITEM workItem; ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] Join of node %1!u! has timed out.\n", NmpJoinerNodeId ); workItem = LocalAlloc(LMEM_FIXED, sizeof(CLRTL_WORK_ITEM)); if (workItem != NULL) { DWORD status; ClRtlInitializeWorkItem( workItem, NmpJoinAbortWorker, ULongToPtr(NmpJoinSequence) ); status = ClRtlPostItemWorkQueue( CsDelayedWorkQueue, workItem, 0, 0 ); if (status == ERROR_SUCCESS) { // // Stop the timer, flag that an abort is in progress, // and account for the thread we just scheduled. // NmpJoinTimer = 0; NmpJoinAbortPending = TRUE; NmpActiveThreadCount++; } else { ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] Failed to schedule abort of join, status %1!u!.\n", status ); LocalFree(workItem); } } else { ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] Failed to allocate memory for join abort.\n" ); } } } NmpLockedLeaveApi(); } return; } // NmpJoinTimerTick VOID NmTimerTick( IN DWORD MsTickInterval ) /*++ Routine Description: Implements all of the NM timers. Called on every tick of the common NM/MM timer - currently every 300ms. Arguments: MsTickInterval - The number of milliseconds that have passed since the last tick. ReturnValue: None. --*/ { NmpAcquireLock(); NmpNetworkTimerTick(MsTickInterval); NmpJoinTimerTick(MsTickInterval); #if DBG // Addition for checking for hung RPC threads. NmpRpcTimerTick(MsTickInterval); #endif // DBG NmpReleaseLock(); return; } // NmTimerTick error_status_t s_JoinAddNode3( IN handle_t IDL_handle, IN LPCWSTR lpszNodeName, IN DWORD dwNodeHighestVersion, IN DWORD dwNodeLowestVersion, IN DWORD dwNodeProductSuite ) /*++ Routine Description: Adds a new node to the cluster. Arguments: IDL_handle - RPC binding handle, not used. lpszNodeName - Supplies the name of the new node. dwNodeHighestVersion - The highest cluster version number that the new node can support. dwNodeLowestVersion - The lowest cluster version number that the new node can support. dwNodeProductSuite - The product suite type identifier for the new node. Return Value: ERROR_SUCCESS if successful Win32 error code otherwise. Notes: This is a new routine in NT5. It performs the AddNode operation correctly. It will never be invoked by an NT4 system. It cannot be invoked if an NT4 node is in the cluster without violating the license agreement. The cluster registry APIs cannot be called while holding the NmpLock, or a deadlock may occur. --*/ { DWORD status; DWORD registryNodeLimit; ClRtlLogPrint(LOG_NOISE, "[NMJOIN] Received request to add node '%1!ws!' to the cluster.\n", lpszNodeName ); // // Read the necessary registry parameters before acquiring // the NM lock. // status = DmQueryDword( DmClusterParametersKey, CLUSREG_NAME_MAX_NODES, ®istryNodeLimit, NULL ); if (status != ERROR_SUCCESS) { registryNodeLimit = 0; } NmpAcquireLock(); if (NmpLockedEnterApi(NmStateOnline)) { DWORD retryCount = 0; //if this is the last node and it has been evicted //but the cleanup hasnt completed and hence the //service is up, then it should not entertain //any new join requests if (NmpLastNodeEvicted) { ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] This node was evicted and hence is not in a valid state to process a " "request to add a node to the cluster.\n" ); status = ERROR_NODE_NOT_AVAILABLE; NmpLockedLeaveApi(); goto FnExit; } while (TRUE) { if (NmpLeaderNodeId == NmLocalNodeId) { // // This node is the leader, call the internal // handler directly. // status = NmpAddNode( lpszNodeName, dwNodeHighestVersion, dwNodeLowestVersion, dwNodeProductSuite, registryNodeLimit ); } else { // // Forward the request to the leader. // RPC_BINDING_HANDLE binding = Session[NmpLeaderNodeId]; ClRtlLogPrint(LOG_NOISE, "[NMJOIN] Forwarding request to add node '%1!ws!' " "to the cluster to the leader (node %!u!).\n", lpszNodeName, NmpLeaderNodeId ); CL_ASSERT(binding != NULL); NmpReleaseLock(); status = NmRpcAddNode( binding, lpszNodeName, dwNodeHighestVersion, dwNodeLowestVersion, dwNodeProductSuite ); NmpAcquireLock(); } // // Check for the error codes that indicate either that // another AddNode operation is in progress or that the // leadership is changing. We will retry in these cases. // if ( (status != ERROR_CLUSTER_JOIN_IN_PROGRESS) && (status != ERROR_NODE_NOT_AVAILABLE) ) { break; } // // Sleep for 3 seconds and try again. We will give up and // return the error after retrying for 2 minutes. // if (++retryCount > 40) { break; } ClRtlLogPrint(LOG_NOISE, "[NMJOIN] AddNode operation for node '%1!ws! delayed " "waiting for competing AddNode operation to complete.\n", lpszNodeName ); NmpReleaseLock(); Sleep(3000); NmpAcquireLock(); } // end while(TRUE) NmpLockedLeaveApi(); } else { ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] This system is not in a valid state to process a " "request to add a node to the cluster.\n" ); status = ERROR_NODE_NOT_AVAILABLE; } FnExit: NmpReleaseLock(); return(status); } // s_NmJoinAddNode3 // This is used by setup of all highest major versions post 1.0 error_status_t s_JoinAddNode2( IN handle_t IDL_handle, IN LPCWSTR lpszNodeName, IN DWORD dwNodeHighestVersion, IN DWORD dwNodeLowestVersion ) /*++ Routine Description: Adds a new node to the cluster database. Arguments: IDL_handle - RPC binding handle, not used. lpszNodeName - Supplies the name of the new node. Return Value: ERROR_SUCCESS if successful Win32 error code otherwise. Notes: This routine was defined in NT4-SP4. JoinAddNode3 is used by NT5. Since it is impossible to install clustering using the NT4-SP4 software, this routine should never be invoked. --*/ { CL_ASSERT(FALSE); return(ERROR_CLUSTER_INCOMPATIBLE_VERSIONS); } error_status_t s_JoinAddNode( IN handle_t IDL_handle, IN LPCWSTR lpszNodeName ) /*++ Routine Description: Adds a new node to the cluster database. Arguments: IDL_handle - RPC binding handle, not used. lpszNodeName - Supplies the name of the new node. Return Value: ERROR_SUCCESS if successful Win32 error code otherwise. Notes: This is the routine that NT4-SP3 setup invokes to add a new node to a cluster. The combination of NT4-SP3 and NT5 is not supported. --*/ { return(ERROR_CLUSTER_INCOMPATIBLE_VERSIONS); } // // The rest of the code is currently unused. // error_status_t s_NmRpcDeliverJoinMessage( IN handle_t IDL_handle, IN UCHAR * Message, IN DWORD MessageLength ) /*++ Routine Description: Server side of the RPC interface for delivering membership join messages. Arguments: IDL_handle - RPC binding handle, not used. buffer - Supplies a pointer to the message data. length - Supplies the length of the message data. Return Value: ERROR_SUCCESS --*/ { DWORD status = ERROR_SUCCESS; #ifdef MM_IN_CLUSNET ClRtlLogPrint(LOG_NOISE, "[NMJOIN] Delivering join message to Clusnet.\n" ); status = ClusnetDeliverJoinMessage( NmClusnetHandle, Message, MessageLength ); #endif return(status); } #ifdef MM_IN_CLUSNET DWORD NmpSendJoinMessage( IN ULONG DestNodeMask, IN PVOID Message, IN ULONG MessageLength ) { DWORD status = ERROR_SUCCESS; CL_NODE_ID node; CL_ASSERT(NmMaxNodeId != ClusterInvalidNodeId); for ( node = ClusterMinNodeId; node <= NmMaxNodeId; node++, (DestNodeMask >>= 1) ) { if (DestNodeMask & 0x1) { if (node != NmLocalNodeId) { ClRtlLogPrint(LOG_NOISE, "[NMJOIN] Sending join message to node %1!u!.\n", node ); status = NmRpcDeliverJoinMessage( Session[node->NodeId], Message, MessageLength ); if (status == RPC_S_CALL_FAILED_DNE) { // // Try again since the first call to a restarted // RPC server will fail. // status = NmRpcDeliverJoinMessage( Session[node->NodeId], Message, MessageLength ); } } else { ClRtlLogPrint(LOG_NOISE, "[NMJOIN] Delivering join message to local node.\n" ); status = ClusnetDeliverJoinMessage( NmClusnetHandle, Message, MessageLength ); } if (status != ERROR_SUCCESS) { ClRtlLogPrint(LOG_NOISE, "[NMJOIN] send of join message to node %1!u! failed, status %2!u!\n", node, status ); break; } } } return(status); } // NmpSendJoinMessage DWORD NmJoinNodeToCluster( CL_NODE_ID joinerNodeId ) { DWORD status; PVOID message = NULL; ULONG messageLength; ULONG destMask; CLUSNET_JOIN_PHASE phase; ClRtlLogPrint(LOG_NOISE, "[NMJOIN] Joining node %1!u! to the cluster.\n", joinerNodeId ); for (phase = ClusnetJoinPhase1; phase <= ClusnetJoinPhase4; phase++) { ClRtlLogPrint(LOG_NOISE, "[NMJOIN] JoinNode phase %1!u!\n", phase ); status = ClusnetJoinCluster( NmClusnetHandle, joinerNodeId, phase, NM_MM_JOIN_TIMEOUT, &message, &messageLength, &destMask ); if (status != ERROR_SUCCESS) { ClRtlLogPrint(LOG_NOISE, "[NMJOIN] JoinNode phase %1!u! failed, status %2!u!\n", phase, status ); break; } status = NmpSendJoinMessage( destMask, message, messageLength ); if (status != ERROR_SUCCESS) { DWORD abortStatus; ClRtlLogPrint(LOG_NOISE, "[NMJOIN] send join message failed %1!u!, aborting join of node %2!u!.\n", status, joinerNodeId ); abortStatus = ClusnetJoinCluster( NmClusnetHandle, joinerNodeId, ClusnetJoinPhaseAbort, NM_MM_JOIN_TIMEOUT, &message, &messageLength, &destMask ); if (abortStatus == ERROR_SUCCESS) { (VOID) NmpSendJoinMessage( destMask, message, messageLength ); } break; } } if (message != NULL) { ClusnetEndJoinCluster(NmClusnetHandle, message); } return(status); } // NmJoinNodeToCluster #endif // MM_IN_CLUSNET