381 lines
12 KiB
C
381 lines
12 KiB
C
/*++
|
||
|
||
Copyright (c) 1996 Microsoft Corporation
|
||
|
||
Module Name:
|
||
|
||
send.c
|
||
|
||
Abstract:
|
||
|
||
APIs for the client side of the checkpoint manager
|
||
|
||
Author:
|
||
|
||
John Vert (jvert) 1/14/1997
|
||
|
||
Revision History:
|
||
|
||
--*/
|
||
#include "cpp.h"
|
||
|
||
|
||
CL_NODE_ID
|
||
CppGetQuorumNodeId(
|
||
VOID
|
||
)
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
Returns the node ID of the node owning the quorum resource.
|
||
|
||
Arguments:
|
||
|
||
None.
|
||
|
||
Return Value:
|
||
|
||
ERROR_SUCCESS if successful
|
||
|
||
Win32 error code otherwise
|
||
|
||
--*/
|
||
|
||
{
|
||
PFM_RESOURCE QuorumResource;
|
||
DWORD Status;
|
||
DWORD NodeId;
|
||
|
||
Status = FmFindQuorumResource(&QuorumResource);
|
||
if (Status != ERROR_SUCCESS) {
|
||
return((DWORD)-1);
|
||
}
|
||
|
||
NodeId = FmFindQuorumOwnerNodeId(QuorumResource);
|
||
OmDereferenceObject(QuorumResource);
|
||
|
||
return(NodeId);
|
||
}
|
||
|
||
|
||
|
||
DWORD
|
||
CpSaveDataFile(
|
||
IN PFM_RESOURCE Resource,
|
||
IN DWORD dwCheckpointId,
|
||
IN LPCWSTR lpszFileName,
|
||
IN BOOLEAN fCryptoCheckpoint
|
||
)
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
This function checkpoints arbitrary data for the specified resource. The data is stored on the quorum
|
||
disk to ensure that it survives partitions in time. Any node in the cluster may save or retrieve
|
||
checkpointed data.
|
||
|
||
Arguments:
|
||
|
||
Resource - Supplies the resource associated with this data.
|
||
|
||
dwCheckpointId - Supplies the unique checkpoint ID describing this data. The caller is responsible
|
||
for ensuring the uniqueness of the checkpoint ID.
|
||
|
||
lpszFileName - Supplies the name of the file with the checkpoint data.
|
||
|
||
fCryptoCheckpoint - Indicates if the checkpoint is a crypto checkpoint.
|
||
|
||
Return Value:
|
||
|
||
ERROR_SUCCESS if successful
|
||
|
||
Win32 error code otherwise
|
||
|
||
--*/
|
||
|
||
{
|
||
CL_NODE_ID OwnerNode;
|
||
DWORD Status;
|
||
|
||
do {
|
||
OwnerNode = CppGetQuorumNodeId();
|
||
ClRtlLogPrint(LOG_NOISE,
|
||
"[CP] CpSaveData: checkpointing data id %1!d! to quorum node %2!d!\n",
|
||
dwCheckpointId,
|
||
OwnerNode);
|
||
if (OwnerNode == NmLocalNodeId) {
|
||
Status = CppWriteCheckpoint(Resource,
|
||
dwCheckpointId,
|
||
lpszFileName,
|
||
fCryptoCheckpoint);
|
||
} else {
|
||
HANDLE hFile;
|
||
FILE_PIPE FilePipe;
|
||
hFile = CreateFileW(lpszFileName,
|
||
GENERIC_READ | GENERIC_WRITE,
|
||
0,
|
||
NULL,
|
||
OPEN_ALWAYS,
|
||
0,
|
||
NULL);
|
||
if (hFile == INVALID_HANDLE_VALUE) {
|
||
Status = GetLastError();
|
||
ClRtlLogPrint(LOG_CRITICAL,
|
||
"[CP] CpSaveData: failed to open data file %1!ws! error %2!d!\n",
|
||
lpszFileName,
|
||
Status);
|
||
} else {
|
||
DmInitFilePipe(&FilePipe, hFile);
|
||
try {
|
||
if (fCryptoCheckpoint) {
|
||
Status = CpDepositCryptoCheckpoint(Session[OwnerNode],
|
||
OmObjectId(Resource),
|
||
dwCheckpointId,
|
||
FilePipe.Pipe);
|
||
} else {
|
||
Status = CpDepositCheckpoint(Session[OwnerNode],
|
||
OmObjectId(Resource),
|
||
dwCheckpointId,
|
||
FilePipe.Pipe);
|
||
}
|
||
} except (I_RpcExceptionFilter(RpcExceptionCode())) {
|
||
ClRtlLogPrint(LOG_CRITICAL,
|
||
"[CP] CpSaveData - s_CpDepositCheckpoint from node %1!d! raised status %2!d!\n",
|
||
OwnerNode,
|
||
GetExceptionCode());
|
||
Status = ERROR_HOST_NODE_NOT_RESOURCE_OWNER;
|
||
}
|
||
DmFreeFilePipe(&FilePipe);
|
||
CloseHandle(hFile);
|
||
}
|
||
}
|
||
|
||
if (Status == ERROR_HOST_NODE_NOT_RESOURCE_OWNER) {
|
||
//
|
||
// This node no longer owns the quorum resource, retry.
|
||
//
|
||
ClRtlLogPrint(LOG_UNUSUAL,
|
||
"[CP] CpSaveData: quorum owner %1!d! no longer owner\n",
|
||
OwnerNode);
|
||
}
|
||
} while ( Status == ERROR_HOST_NODE_NOT_RESOURCE_OWNER );
|
||
return(Status);
|
||
}
|
||
|
||
DWORD
|
||
CpDeleteCheckpointFile(
|
||
IN PFM_RESOURCE Resource,
|
||
IN DWORD dwCheckpointId,
|
||
IN OPTIONAL LPCWSTR lpszQuorumPath
|
||
)
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
This function removes the checkpoint file correspoinding to the
|
||
checkpoint id for a given resource from the given directory.
|
||
|
||
Arguments:
|
||
|
||
Resource - Supplies the resource associated with this data.
|
||
|
||
dwCheckpointId - Supplies the unique checkpoint ID describing this data. The caller is responsible
|
||
for ensuring the uniqueness of the checkpoint ID.
|
||
|
||
lpszQuorumPath - Supplies the path of the cluster files on a quorum device.
|
||
|
||
Return Value:
|
||
|
||
ERROR_SUCCESS if successful
|
||
|
||
Win32 error code otherwise
|
||
|
||
--*/
|
||
|
||
{
|
||
CL_NODE_ID OwnerNode;
|
||
DWORD Status;
|
||
|
||
do {
|
||
OwnerNode = CppGetQuorumNodeId();
|
||
ClRtlLogPrint(LOG_NOISE,
|
||
"[CP] CpDeleteDataFile: removing checkpoint file for id %1!d! at quorum node %2!d!\n",
|
||
dwCheckpointId,
|
||
OwnerNode);
|
||
if (OwnerNode == NmLocalNodeId)
|
||
{
|
||
Status = CppDeleteCheckpointFile(Resource, dwCheckpointId, lpszQuorumPath);
|
||
}
|
||
else
|
||
{
|
||
Status = CpDeleteCheckpoint(Session[OwnerNode],
|
||
OmObjectId(Resource),
|
||
dwCheckpointId,
|
||
lpszQuorumPath);
|
||
|
||
//talking to an old server, cant perform this function
|
||
//ignore the error
|
||
if (Status == RPC_S_PROCNUM_OUT_OF_RANGE)
|
||
Status = ERROR_SUCCESS;
|
||
}
|
||
|
||
if (Status == ERROR_HOST_NODE_NOT_RESOURCE_OWNER) {
|
||
//
|
||
// This node no longer owns the quorum resource, retry.
|
||
//
|
||
ClRtlLogPrint(LOG_UNUSUAL,
|
||
"[CP] CpSaveData: quorum owner %1!d! no longer owner\n",
|
||
OwnerNode);
|
||
}
|
||
} while ( Status == ERROR_HOST_NODE_NOT_RESOURCE_OWNER );
|
||
return(Status);
|
||
}
|
||
|
||
|
||
|
||
DWORD
|
||
CpGetDataFile(
|
||
IN PFM_RESOURCE Resource,
|
||
IN DWORD dwCheckpointId,
|
||
IN LPCWSTR lpszFileName,
|
||
IN BOOLEAN fCryptoCheckpoint
|
||
)
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
This function retrieves checkpoint data for the specified resource. The data must
|
||
have been saved by CpSaveData. Any node in the cluster may save or retrieve
|
||
checkpointed data.
|
||
|
||
Arguments:
|
||
|
||
Resource - Supplies the resource associated with this data.
|
||
|
||
dwCheckpointId - Supplies the unique checkpoint ID describing this data. The caller is
|
||
responsible for ensuring the uniqueness of the checkpoint ID.
|
||
|
||
lpszFileName - Supplies the filename where the data should be retrieved.
|
||
|
||
fCryptoCheckpoint - Indicates if the checkpoint is a crypto checkpoint.
|
||
|
||
Return Value:
|
||
|
||
ERROR_SUCCESS if successful
|
||
|
||
Win32 error code otherwise
|
||
|
||
--*/
|
||
|
||
{
|
||
CL_NODE_ID OwnerNode;
|
||
DWORD Status;
|
||
DWORD Count = 60;
|
||
|
||
RetryRetrieveChkpoint:
|
||
OwnerNode = CppGetQuorumNodeId();
|
||
ClRtlLogPrint(LOG_NOISE,
|
||
"[CP] CpGetDataFile: restoring data id %1!d! from quorum node %2!d!\n",
|
||
dwCheckpointId,
|
||
OwnerNode);
|
||
if (OwnerNode == NmLocalNodeId) {
|
||
Status = CppReadCheckpoint(Resource,
|
||
dwCheckpointId,
|
||
lpszFileName,
|
||
fCryptoCheckpoint);
|
||
} else {
|
||
HANDLE hFile;
|
||
FILE_PIPE FilePipe;
|
||
|
||
hFile = CreateFileW(lpszFileName,
|
||
GENERIC_READ | GENERIC_WRITE,
|
||
0,
|
||
NULL,
|
||
CREATE_ALWAYS,
|
||
0,
|
||
NULL);
|
||
if (hFile == INVALID_HANDLE_VALUE) {
|
||
Status = GetLastError();
|
||
ClRtlLogPrint(LOG_CRITICAL,
|
||
"[CP] CpGetDataFile: failed to create new file %1!ws! error %2!d!\n",
|
||
lpszFileName,
|
||
Status);
|
||
} else {
|
||
DmInitFilePipe(&FilePipe, hFile);
|
||
try {
|
||
if (fCryptoCheckpoint) {
|
||
Status = CpRetrieveCryptoCheckpoint(Session[OwnerNode],
|
||
OmObjectId(Resource),
|
||
dwCheckpointId,
|
||
FilePipe.Pipe);
|
||
} else {
|
||
Status = CpRetrieveCheckpoint(Session[OwnerNode],
|
||
OmObjectId(Resource),
|
||
dwCheckpointId,
|
||
FilePipe.Pipe);
|
||
}
|
||
} except (I_RpcExceptionFilter(RpcExceptionCode())) {
|
||
ClRtlLogPrint(LOG_CRITICAL,
|
||
"[CP] CpGetData - s_CpRetrieveCheckpoint from node %1!d! raised status %2!d!\n",
|
||
OwnerNode,
|
||
GetExceptionCode());
|
||
CL_UNEXPECTED_ERROR( GetExceptionCode() );
|
||
Status = ERROR_HOST_NODE_NOT_RESOURCE_OWNER;
|
||
}
|
||
DmFreeFilePipe(&FilePipe);
|
||
CloseHandle(hFile);
|
||
}
|
||
}
|
||
if (Status == ERROR_HOST_NODE_NOT_RESOURCE_OWNER) {
|
||
//
|
||
// This node no longer owns the quorum resource, retry.
|
||
//
|
||
ClRtlLogPrint(LOG_UNUSUAL,
|
||
"[CP] CpGetData: quorum owner %1!d! no longer owner\n",
|
||
OwnerNode);
|
||
goto RetryRetrieveChkpoint;
|
||
}
|
||
else if ((Status == ERROR_ACCESS_DENIED) ||
|
||
(Status == ERROR_INVALID_FUNCTION) ||
|
||
(Status == ERROR_NOT_READY) ||
|
||
(Status == RPC_X_INVALID_PIPE_OPERATION) ||
|
||
(Status == ERROR_BUSY) ||
|
||
(Status == ERROR_SWAPERROR))
|
||
{
|
||
//if the quorum resource offline suddenly
|
||
ClRtlLogPrint(LOG_UNUSUAL,
|
||
"[CP] CpGetData: quorum resource went offline in the middle, Count=%1!u!\n",
|
||
Count);
|
||
//we dont prevent the quorum resource from going offline if some resource
|
||
//is blocked for a long time in its online/offline thread- this is because
|
||
//some resources(like dtc)try to enumerate resources in the quorum group
|
||
//we increase the timeout to give cp a chance to retrieve the checkpoint
|
||
//while the quorum group is being moved or failed over
|
||
if (Count--)
|
||
{
|
||
Sleep(1000);
|
||
goto RetryRetrieveChkpoint;
|
||
}
|
||
}
|
||
|
||
if (Status != ERROR_SUCCESS) {
|
||
WCHAR string[16];
|
||
|
||
wsprintfW(&(string[0]), L"%u", Status);
|
||
|
||
ClRtlLogPrint(LOG_CRITICAL,
|
||
"[CP] CpGetDataFile - failed to retrieve checkpoint %1!d! error %2!d!\n",
|
||
dwCheckpointId,
|
||
Status);
|
||
CL_LOGCLUSERROR2(CP_RESTORE_REGISTRY_FAILURE, OmObjectName(Resource), string);
|
||
#if DBG
|
||
if (IsDebuggerPresent())
|
||
DebugBreak();
|
||
#endif
|
||
|
||
}
|
||
|
||
return(Status);
|
||
}
|