windows-nt/Source/XPSP1/NT/base/cluster/service/mm/jrgpos.h

439 lines
18 KiB
C
Raw Permalink Normal View History

2020-09-26 03:20:57 -05:00
#ifndef _JRGPOS_H_
#define _JRGPOS_H_
#ifdef __TANDEM
#pragma columns 79
#pragma page "jrgpos.h - T9050 - OS-specific declarations for Regroup Module"
#endif
/* @@@@@@ START COPYRIGHT @@@@@@
** Tandem Confidential: Need to Know only
** Copyright (c) 1995, Tandem Computers Incorporated
** Protected as an unpublished work.
** All Rights Reserved.
**
** The computer program listings, specifications, and documentation
** herein are the property of Tandem Computers Incorporated and shall
** not be reproduced, copied, disclosed, or used in whole or in part
** for any reason without the prior express written permission of
** Tandem Computers Incorporated.
**
** @@@@@@ END COPYRIGHT @@@@@@
**/
/*---------------------------------------------------------------------------
* This file (jrgpos.h) contains OS-specific declarations used by Regroup.
* Use appropriate #includes to pull in declarations from other native
* OS files.
*---------------------------------------------------------------------------*/
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
#ifdef NSK
#include <jmsgtyp.h> /* to get "uint8", "uint16" and "uint32" */
#include <dmem.h>
#include <dcpuctl.h>
#include <jmsglit.h>
#define RGP_NULL_PTR NIL_ /* Null pointer for use by RGP */
#define MAX_CLUSTER_SIZE MAX_CPUS /* max # of nodes supported in system */
#define LOWEST_NODENUM ((node_t)0) /* starting node number */
#define RGP_NULL_NODE ((node_t)-1) /* special node# for defaults */
#define RGP_KEY_NODE RGP_NULL_NODE /* No node is special */
#endif /* NSK */
#if defined(LCU) || defined(UNIX) || defined(NT)
typedef unsigned char uint8;
typedef unsigned short uint16;
typedef unsigned int uint32;
#ifndef NULL
#define NULL ((void *)0)
#endif /* NULL */
#define RGP_NULL_PTR NULL /* Null pointer for use by RGP */
#if defined(LCU) || defined(UNIX)
#define MAX_CLUSTER_SIZE 16 /* max # of nodes supported in system */
#define LOWEST_NODENUM ((node_t)1) /* starting node number */
#endif
#if defined(NT)
#include "service.h"
#define MAX_CLUSTER_SIZE ClusterDefaultMaxNodes
/* max # of nodes supported in system */
#define LOWEST_NODENUM ((node_t)ClusterMinNodeId) /* starting node number */
#endif
#define RGP_NULL_NODE ((node_t)-1)
/* special node# for defaults */
#define RGP_KEY_NODE RGP_NULL_NODE /* No node is special */
#define _priv /* used only by NSK compilers */
#define _resident /* used only by NSK compilers */
#endif /* LCU || UNIX || NT */
/*---------------------------------------------------------------
* Unacknowledged packet subtypes used by regroup.
* These are made visible to the OS for reporting and counting
* purposes only.
*---------------------------------------------------------------*/
/* Maximum payload of packets sent by Regroup is 56 bytes.
* This allows a maximum transport overhead of 8 bytes in the
* ServerNet interrupt packet which has a size of 64 bytes.
*/
#define RGP_UNACK_PKTLEN 56 /*bytes*/
typedef struct
{
uint8 pktsubtype;
uint8 subtype_specific[RGP_UNACK_PKTLEN - sizeof(uint8)];
} rgp_unseq_pkt_t;
/* Regroup unacknowledged packet subtypes */
#define RGP_UNACK_IAMALIVE (uint8) 1 /* I am alive packet */
#define RGP_UNACK_REGROUP (uint8) 2 /* regroup status packet */
#define RGP_UNACK_POISON (uint8) 3 /* poison packet */
/*---------------------------------------------------------------*/
/* Processor status codes returned by the Service Processor (SP) */
/*---------------------------------------------------------------*/
#define RGP_NODE_UNREACHABLE 0
#define RGP_NODE_TIMED_OUT 1
#define RGP_NODE_FROZEN 2
#define RGP_NODE_HALTED 3
#define RGP_NODE_OPERATIONAL 4
/*----------------------*/
/* Processor halt codes */
/*----------------------*/
#ifdef NSK
#include <dhalt.h>
#endif /* NSK */
#if defined(LCU) || defined(UNIX) || defined(NT)
#define RGP_RELOADFAILED 1
#define RGP_INTERNAL_ERROR 2
#define RGP_MISSED_POLL_TO_SELF 3
#define RGP_AVOID_SPLIT_BRAIN 4
#define RGP_PRUNED_OUT 5
#define RGP_PARIAH 6
#define RGP_PARIAH_FIRST RGP_PARIAH + LOWEST_NODENUM
#define RGP_PARIAH_LAST RGP_PARIAH_FIRST + MAX_CLUSTER_SIZE - 1
#define RGP_ARBITRATION_FAILED 1000
#define RGP_ARBITRATION_STALLED 1001
#define RGP_SHUTDOWN_DURING_RGP 1002 // Alias of MM_STOP_REQUESTED in mmapi.h
#endif /* LCU || UNIX || NT */
/*-------------------------------------------------------------------------
* Timing parameters for Regroup. Of these, RGP_IAMALIVE_TICKS can be
* overridden at run time using the rgp_getrgpinfo(), rgp_setrgpinfo()
* routine pair. This is useful to slow node failure detection during
* kernel debug sessions.
*-------------------------------------------------------------------------*/
#ifdef NSK
#define RGP_CLOCK_PERIOD 30 /* period between regroup ticks,
* in units of 10 milliseconds */
#define RGP_PFAIL_TICKS 16 /* # of regroup ticks after a
* power on event to forgive
* missing self IamAlives */
#endif /* NSK */
#ifdef LCU
#define RGP_CLOCK_PERIOD 30 /* period between regroup ticks,
* in units of 10 milliseconds */
#define RGP_PFAIL_TICKS 16 /* # of regroup ticks after a
* power on event to forgive
* missing self IamAlives */
#endif /* LCU */
#ifdef UNIX
#define RGP_CLOCK_PERIOD 100 /* period between regroup ticks,
* in units of 10 milliseconds */
#define RGP_PFAIL_TICKS 16 /* # of regroup ticks after a
* power on event to forgive
* missing self IamAlives */
#endif /* UNIX */
#ifdef NT
#define RGP_INACTIVE_PERIOD 60000 /* period between regroup ticks in ms when
* node is inactive. 1 minute period */
#define RGP_CLOCK_PERIOD 300 /* period between regroup ticks,
* in units of milliseconds */
#define RGP_PFAIL_TICKS 16 /* # of regroup ticks after a
* power on event to forgive
* missing self IamAlives */
#endif /* NT */
/* The following timing parameters can be overridden at run time by using
* the rgp_getrgpinfo(), rgp_setrgpinfo() routine pair.
*/
// Bug#328641
//
// Extend min_stage1 to approx 4 seconds to match NM values (check_ticks 2=>3)
// Extend connectivity ticks to 3 to 9 ticks
// adjust rgp_must_restart accordingly (20=>23) half conn_tick increase
//
#define RGP_IAMALIVE_TICKS 4 /* rgp clock ticks between IamAlives */
#define RGP_CHECK_TICKS 3 /* rgp clock ticks before at least 1 ImAlive received */
#define RGP_MIN_STAGE1_TICKS (RGP_IAMALIVE_TICKS * RGP_CHECK_TICKS)
/* The following parameters can be made to be OS-dependent if needed.
*/
#define RGP_MUST_ENTER_STAGE2 32 /* must enter stage2 after this many
ticks, regardless of conditions */
#define RGP_CONNECTIVITY_TICKS 9 /* max # of ticks to wait in stage 2
to collect connectivity info */
#define RGP_MUST_RESTART 23 /* stall detector tick count; if no
progress after this many ticks,
abort and restart regroup. */
#define RGP_RESTART_MAX 3 /* maximum number of restarts
allowed per regroup incident;
if this is exceeded, the node
halts. */
/*--------------------------------------*/
/* Definition of node and cluster types */
/*--------------------------------------*/
typedef short node_t;
/* The cluster_t data type is a bit array with MAX_CLUSTER_SIZE
* bits. It is implemented as an array of MAX_CLUSTER_SIZE/8
* (rounded up) uint8s.
*/
#define BYTEL 8 /* number of bits in a uint8 */
#define BYTES_IN_CLUSTER ((MAX_CLUSTER_SIZE + BYTEL - 1) / BYTEL)
typedef uint8 cluster_t [BYTES_IN_CLUSTER];
/************************************************************************
* rgp_msgsys_t (shared by regroup and message system)
* ---------------------------------------------------
* This structure is used by Regroup and the Message System to co-ordinate
* actions that are to be done by the Message System on behalf of Regroup.
* Regroup posts work requests in timer or IPC interrupt context and the
* message system performs these at appropriate times (from the
* dispatcher in NSK).
*
* ___________________________________________________________
* wd0 | flags (bitfields) | regroup_nodes |
* |_____________________________|_____________________________|
* wd1 | iamalive_nodes | poison_nodes |
* |_____________________________|_____________________________|
* wd2 | *regroup_data |
* |___________________________________________________________|
* wd3 | *iamalive_data |
* |___________________________________________________________|
* wd4 | *poison_data |
* |___________________________________________________________|
* wd5 | regroup_datalen |
* |___________________________________________________________|
* wd6 | iamalive_datalen |
* |___________________________________________________________|
* wd7 | poison_datalen |
* |___________________________________________________________|
*
*
* flags:
*
* sendrgppkts - have regroup status packets to send
* sendiamalives - have iamalive status packets to send
* sendpoisons - have poison packets to send
* phase1_cleanup - need to start phase1 cleanup due to node death
* phase2_cleanup - need to start phase2 cleanup due to node death
*
* regroup_nodes - mask of nodes to send regroup pkts to
* iamalive_nodes - mask of nodes to send iamalives to
* poison_nodes - mask of nodes to send poison pkts to
*
* The following fields are used in NSK and the user-level UNIX
* simulation only.
*
* regroup_data - address of regroup pkt data to send
* iamalive_data - address of iamalive data to send
* poison_data - address of poison pkt data to send
*
* regroup_datalen - length of regroup pkt data to send
* iamalive_datalen - length of iamalive data to send
* poison_datalen - length of poison pkt data to send
*
*/
#ifdef __TANDEM
#pragma fieldalign shared8 rgp_msgsys
#endif /* __TANDEM */
typedef struct rgp_msgsys
{
uint16 sendrgppkts : 1;
uint16 sendiamalives : 1;
uint16 sendpoisons : 1;
uint16 phase1_cleanup : 1;
uint16 phase2_cleanup : 1;
uint16 filler : 11;
cluster_t regroup_nodes;
cluster_t iamalive_nodes;
cluster_t poison_nodes;
#if defined(NSK) || defined(UNIX) || defined(NT)
void *regroup_data;
void *iamalive_data;
void *poison_data;
uint32 regroup_datalen;
uint32 iamalive_datalen;
uint32 poison_datalen;
#endif /* NSK || UNIX || NT */
} rgp_msgsys_t;
typedef struct rgp_msgsys *rgp_msgsys_p;
/*----------------------------------------------------------------------
* OS-dependent routines used by Regroup.
*
* These are defined either in the regroup file srgpos.c or in other
* modules in the OS.
*----------------------------------------------------------------------*/
_priv _resident extern void rgp_init_OS(void);
_priv _resident extern void rgp_broadcast(uint8 packet_subtype);
_priv _resident extern void rgp_node_failed(node_t node);
_priv _resident extern void rgp_start_phase1_cleanup(void);
_priv _resident extern void rgp_start_phase2_cleanup(void);
_priv _resident extern void rgp_cleanup_complete(void);
_priv _resident extern void rgp_had_power_failure(node_t node);
_priv _resident extern int rgp_status_of_node(node_t node);
_priv _resident extern void rgp_newnode_online(node_t newnode);
_priv _resident extern int rgp_select_cluster(cluster_t cluster_choices[],
int num_clusters);
_priv _resident extern int rgp_select_cluster_ex(cluster_t cluster_choices[],
int num_clusters, node_t keynode);
_priv _resident extern void rgp_cleanup_OS(void);
#ifdef NSK
#include <tsrtnvl.h> /*F40:MB06452.1*/
#include <tsdevdf.h> /*F40:MB06452.2*/
#include <tsport.h> /*F40:MB06452.3*/
#include <tsentry.h> /*F40:MB06452.4*/
/*F40:MB06452.5*/
#define rgp_hold_all_io TSER_TRANSFER_PAUSE_ /*F40:MB064514.1*/
#define rgp_resume_all_io TSER_TRANSFER_CONTINUE_ /*F40:MB064514.2*/
/*F40:MB06452.8*/
#else
_priv _resident extern void rgp_hold_all_io(void);
_priv _resident extern void rgp_resume_all_io(void);
#endif /* NSK */
/*
* Macros to set and get the members of a cluster using a mask of
* appropriate size.
*/
#define SetCluster(/* cluster_t */ cluster, /* uint16 */ mask) \
{ \
cluster[0] = (uint8)(mask >> 8); \
cluster[1] = (uint8)(mask & 0xFF); \
}
#define GetCluster(/* cluster_t */ cluster) \
(((uint16)cluster[0] << 8) | (uint16)cluster[1])
/* Macro to combine two cluster masks into a uint32.
* This is used in tracing regroup events.
*/
#define RGP_MERGE_TO_32( c1, c2 ) \
( ( GetCluster( c1 ) << 16 ) | ( GetCluster( c2 ) ) )
/*----------------------------------------------------------------------
* OS-dependent routines used by the Regroup module.
*
* These are defined in srgpos.c.
*----------------------------------------------------------------------*/
/* Routines to halt the node upon catastrophic errors. */
#ifdef NSK
#include <dutil.h> /*F40:MB06458.3*/
#define RGP_ERROR(/* uint16 */ halt_code) SYSTEM_FREEZE_(halt_code)
#else
_priv _resident extern void RGP_ERROR_EX (uint16 halt_code, char* fname, DWORD lineno);
#define RGP_ERROR(halt_code) RGP_ERROR_EX(halt_code, __FILE__, __LINE__)
#endif /* NSK */
#ifdef UNIX
_priv _resident extern void PrintRegroupStart();
_priv _resident extern void PrintPruningResult();
_priv _resident extern void PrintStage();
_priv _resident extern void PrintMatrix();
#endif /* UNIX */
#ifdef __cplusplus
}
#endif /* __cplusplus */
#if 0
History of changes to this file:
-------------------------------------------------------------------------
1995, December 13 F40:KSK0610 /*F40:KSK06102.1*/
This file is part of the portable Regroup Module used in the NonStop
Kernel (NSK) and Loosely Coupled UNIX (LCU) operating systems. There
are 10 files in the module - jrgp.h, jrgpos.h, wrgp.h, wrgpos.h,
srgpif.c, srgpos.c, srgpsm.c, srgputl.c, srgpcli.c and srgpsvr.c.
The last two are simulation files to test the Regroup Module on a
UNIX workstation in user mode with processes simulating processor nodes
and UDP datagrams used to send unacknowledged datagrams.
This file was first submitted for release into NSK on 12/13/95.
------------------------------------------------------------------------------
This change occurred on 19 Jan 1996 /*F40:MB06458.4*/
Changes for phase IV Sierra message system release. Includes: /*F40:MB06458.5*/
- Some cleanup of the code /*F40:MB06458.6*/
- Increment KCCB counters to count the number of setup messages and /*F40:MB06458.7*/
unsequenced messages sent. /*F40:MB06458.8*/
- Fixed some bugs /*F40:MB06458.9*/
- Disable interrupts before allocating broadcast sibs. /*F40:MB06458.10*/
- Change per-packet-timeout to 5ms /*F40:MB06458.11*/
- Make the regroup and powerfail broadcast use highest priority /*F40:MB06458.12*/
tnet services queue. /*F40:MB06458.13*/
- Call the millicode backdoor to get the processor status from SP /*F40:MB06458.14*/
- Fixed expand bug in msg_listen_ and msg_readctrl_ /*F40:MB06458.15*/
- Added enhancement to msngr_sendmsg_ so that clients do not need /*F40:MB06458.16*/
to be unstoppable before calling this routine. /*F40:MB06458.17*/
- Added new steps in the build file called /*F40:MB06458.18*/
MSGSYS_C - compiles all the message system C files /*F40:MB06458.19*/
MSDRIVER - compiles all the MSDriver files /*F40:MB06458.20*/
REGROUP - compiles all the regroup files /*F40:MB06458.21*/
----------------------------------------------------------------------- /*F40:MB06458.22*/
#endif /* 0 - change descriptions */
#endif /* _JRGPOS_H_ defined */